feat: add crawl and search ui
This commit is contained in:
@@ -335,4 +335,81 @@ export const deleteSchedule = async (id: string): Promise<boolean> => {
|
|||||||
console.log(error);
|
console.log(error);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const createCrawlRobot = async (
|
||||||
|
url: string,
|
||||||
|
name: string,
|
||||||
|
crawlConfig: {
|
||||||
|
mode: 'domain' | 'subdomain' | 'path';
|
||||||
|
limit: number;
|
||||||
|
maxDepth: number;
|
||||||
|
includePaths: string[];
|
||||||
|
excludePaths: string[];
|
||||||
|
useSitemap: boolean;
|
||||||
|
followLinks: boolean;
|
||||||
|
respectRobots: boolean;
|
||||||
|
}
|
||||||
|
): Promise<any> => {
|
||||||
|
try {
|
||||||
|
const response = await axios.post(
|
||||||
|
`${apiUrl}/recordings/crawl`,
|
||||||
|
{
|
||||||
|
url,
|
||||||
|
name,
|
||||||
|
crawlConfig,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
withCredentials: true,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (response.status === 201) {
|
||||||
|
return response.data;
|
||||||
|
} else {
|
||||||
|
throw new Error('Failed to create crawl robot');
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('Error creating crawl robot:', error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
export const createSearchRobot = async (
|
||||||
|
name: string,
|
||||||
|
searchConfig: {
|
||||||
|
query: string;
|
||||||
|
limit: number;
|
||||||
|
provider: 'google' | 'bing' | 'duckduckgo';
|
||||||
|
filters?: {
|
||||||
|
timeRange?: 'day' | 'week' | 'month' | 'year';
|
||||||
|
location?: string;
|
||||||
|
lang?: string;
|
||||||
|
};
|
||||||
|
mode: 'discover' | 'scrape';
|
||||||
|
}
|
||||||
|
): Promise<any> => {
|
||||||
|
try {
|
||||||
|
const response = await axios.post(
|
||||||
|
`${apiUrl}/recordings/search`,
|
||||||
|
{
|
||||||
|
name,
|
||||||
|
searchConfig,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
withCredentials: true,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (response.status === 201) {
|
||||||
|
return response.data;
|
||||||
|
} else {
|
||||||
|
throw new Error('Failed to create search robot');
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
console.error('Error creating search robot:', error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
@@ -17,12 +17,14 @@ import {
|
|||||||
FormControl,
|
FormControl,
|
||||||
Select,
|
Select,
|
||||||
MenuItem,
|
MenuItem,
|
||||||
InputLabel
|
InputLabel,
|
||||||
|
Collapse,
|
||||||
|
FormControlLabel
|
||||||
} from '@mui/material';
|
} from '@mui/material';
|
||||||
import { ArrowBack, AutoAwesome, HighlightAlt } from '@mui/icons-material';
|
import { ArrowBack, AutoAwesome, HighlightAlt } from '@mui/icons-material';
|
||||||
import { useGlobalInfoStore, useCacheInvalidation } from '../../../context/globalInfo';
|
import { useGlobalInfoStore, useCacheInvalidation } from '../../../context/globalInfo';
|
||||||
import { canCreateBrowserInState, getActiveBrowserId, stopRecording } from '../../../api/recording';
|
import { canCreateBrowserInState, getActiveBrowserId, stopRecording } from '../../../api/recording';
|
||||||
import { createScrapeRobot, createLLMRobot, createAndRunRecording } from "../../../api/storage";
|
import { createScrapeRobot, createLLMRobot, createAndRunRecording, createCrawlRobot, createSearchRobot } from "../../../api/storage";
|
||||||
import { AuthContext } from '../../../context/auth';
|
import { AuthContext } from '../../../context/auth';
|
||||||
import { GenericModal } from '../../ui/GenericModal';
|
import { GenericModal } from '../../ui/GenericModal';
|
||||||
|
|
||||||
@@ -72,6 +74,25 @@ const RobotCreate: React.FC = () => {
|
|||||||
const [llmBaseUrl, setLlmBaseUrl] = useState('');
|
const [llmBaseUrl, setLlmBaseUrl] = useState('');
|
||||||
const [aiRobotName, setAiRobotName] = useState('');
|
const [aiRobotName, setAiRobotName] = useState('');
|
||||||
|
|
||||||
|
const [crawlRobotName, setCrawlRobotName] = useState('');
|
||||||
|
const [crawlUrl, setCrawlUrl] = useState('');
|
||||||
|
const [crawlMode, setCrawlMode] = useState<'domain' | 'subdomain' | 'path'>('domain');
|
||||||
|
const [crawlLimit, setCrawlLimit] = useState(50);
|
||||||
|
const [crawlMaxDepth, setCrawlMaxDepth] = useState(3);
|
||||||
|
const [crawlIncludePaths, setCrawlIncludePaths] = useState<string>('');
|
||||||
|
const [crawlExcludePaths, setCrawlExcludePaths] = useState<string>('');
|
||||||
|
const [crawlUseSitemap, setCrawlUseSitemap] = useState(true);
|
||||||
|
const [crawlFollowLinks, setCrawlFollowLinks] = useState(true);
|
||||||
|
const [crawlRespectRobots, setCrawlRespectRobots] = useState(true);
|
||||||
|
const [showCrawlAdvanced, setShowCrawlAdvanced] = useState(false);
|
||||||
|
|
||||||
|
const [searchRobotName, setSearchRobotName] = useState('');
|
||||||
|
const [searchQuery, setSearchQuery] = useState('');
|
||||||
|
const [searchLimit, setSearchLimit] = useState(10);
|
||||||
|
const [searchProvider] = useState<'duckduckgo'>('duckduckgo');
|
||||||
|
const [searchMode, setSearchMode] = useState<'discover' | 'scrape'>('discover');
|
||||||
|
const [searchTimeRange, setSearchTimeRange] = useState<'day' | 'week' | 'month' | 'year' | ''>('');
|
||||||
|
|
||||||
const { state } = React.useContext(AuthContext);
|
const { state } = React.useContext(AuthContext);
|
||||||
const { user } = state;
|
const { user } = state;
|
||||||
const { addOptimisticRobot, removeOptimisticRobot, invalidateRecordings, invalidateRuns, addOptimisticRun } = useCacheInvalidation();
|
const { addOptimisticRobot, removeOptimisticRobot, invalidateRecordings, invalidateRuns, addOptimisticRun } = useCacheInvalidation();
|
||||||
@@ -155,6 +176,76 @@ const RobotCreate: React.FC = () => {
|
|||||||
navigate('/robots');
|
navigate('/robots');
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const handleCreateCrawlRobot = async () => {
|
||||||
|
if (!crawlUrl.trim()) {
|
||||||
|
notify('error', 'Please enter a valid URL');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!crawlRobotName.trim()) {
|
||||||
|
notify('error', 'Please enter a robot name');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
setIsLoading(true);
|
||||||
|
const result = await createCrawlRobot(
|
||||||
|
crawlUrl,
|
||||||
|
crawlRobotName,
|
||||||
|
{
|
||||||
|
mode: crawlMode,
|
||||||
|
limit: crawlLimit,
|
||||||
|
maxDepth: crawlMaxDepth,
|
||||||
|
includePaths: crawlIncludePaths ? crawlIncludePaths.split(',').map(p => p.trim()) : [],
|
||||||
|
excludePaths: crawlExcludePaths ? crawlExcludePaths.split(',').map(p => p.trim()) : [],
|
||||||
|
useSitemap: crawlUseSitemap,
|
||||||
|
followLinks: crawlFollowLinks,
|
||||||
|
respectRobots: crawlRespectRobots
|
||||||
|
}
|
||||||
|
);
|
||||||
|
setIsLoading(false);
|
||||||
|
|
||||||
|
if (result) {
|
||||||
|
invalidateRecordings();
|
||||||
|
notify('success', `${crawlRobotName} created successfully!`);
|
||||||
|
navigate('/robots');
|
||||||
|
} else {
|
||||||
|
notify('error', 'Failed to create crawl robot');
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleCreateSearchRobot = async () => {
|
||||||
|
if (!searchQuery.trim()) {
|
||||||
|
notify('error', 'Please enter a search query');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!searchRobotName.trim()) {
|
||||||
|
notify('error', 'Please enter a robot name');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
setIsLoading(true);
|
||||||
|
const result = await createSearchRobot(
|
||||||
|
searchRobotName,
|
||||||
|
{
|
||||||
|
query: searchQuery,
|
||||||
|
limit: searchLimit,
|
||||||
|
provider: searchProvider,
|
||||||
|
filters: {
|
||||||
|
timeRange: searchTimeRange ? searchTimeRange as 'day' | 'week' | 'month' | 'year' : undefined
|
||||||
|
},
|
||||||
|
mode: searchMode
|
||||||
|
}
|
||||||
|
);
|
||||||
|
setIsLoading(false);
|
||||||
|
|
||||||
|
if (result) {
|
||||||
|
invalidateRecordings();
|
||||||
|
notify('success', `${searchRobotName} created successfully!`);
|
||||||
|
navigate('/robots');
|
||||||
|
} else {
|
||||||
|
notify('error', 'Failed to create search robot');
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Container maxWidth="md" sx={{ py: 4 }}>
|
<Container maxWidth="md" sx={{ py: 4 }}>
|
||||||
<Box>
|
<Box>
|
||||||
@@ -210,6 +301,8 @@ const RobotCreate: React.FC = () => {
|
|||||||
>
|
>
|
||||||
<Tab label="Extract" id="extract-robot" aria-controls="extract-robot" />
|
<Tab label="Extract" id="extract-robot" aria-controls="extract-robot" />
|
||||||
<Tab label="Scrape" id="scrape-robot" aria-controls="scrape-robot" />
|
<Tab label="Scrape" id="scrape-robot" aria-controls="scrape-robot" />
|
||||||
|
<Tab label="Crawl" id="crawl-robot" aria-controls="crawl-robot" />
|
||||||
|
<Tab label="Search" id="search-robot" aria-controls="search-robot" />
|
||||||
</Tabs>
|
</Tabs>
|
||||||
</Box>
|
</Box>
|
||||||
|
|
||||||
@@ -697,6 +790,262 @@ const RobotCreate: React.FC = () => {
|
|||||||
</Box>
|
</Box>
|
||||||
</Card>
|
</Card>
|
||||||
</TabPanel>
|
</TabPanel>
|
||||||
|
|
||||||
|
<TabPanel value={tabValue} index={2}>
|
||||||
|
<Card sx={{ mb: 4, p: 4, textAlign: 'center' }}>
|
||||||
|
<Box display="flex" flexDirection="column" alignItems="center">
|
||||||
|
<img
|
||||||
|
src="https://ik.imagekit.io/ys1blv5kv/maxunlogo.png"
|
||||||
|
width={73}
|
||||||
|
height={65}
|
||||||
|
style={{
|
||||||
|
borderRadius: '5px',
|
||||||
|
marginBottom: '30px'
|
||||||
|
}}
|
||||||
|
alt="Maxun Logo"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<Typography variant="body2" color="text.secondary" mb={3}>
|
||||||
|
Crawl entire websites and gather data from multiple pages automatically.
|
||||||
|
</Typography>
|
||||||
|
|
||||||
|
<Box sx={{ width: '100%', maxWidth: 700, mb: 2 }}>
|
||||||
|
<TextField
|
||||||
|
label="Robot Name"
|
||||||
|
placeholder="Example: YC Companies Crawler"
|
||||||
|
fullWidth
|
||||||
|
value={crawlRobotName}
|
||||||
|
onChange={(e) => setCrawlRobotName(e.target.value)}
|
||||||
|
sx={{ mb: 2 }}
|
||||||
|
/>
|
||||||
|
<TextField
|
||||||
|
label="Starting URL"
|
||||||
|
placeholder="https://www.ycombinator.com/companies"
|
||||||
|
fullWidth
|
||||||
|
value={crawlUrl}
|
||||||
|
onChange={(e) => setCrawlUrl(e.target.value)}
|
||||||
|
sx={{ mb: 2 }}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<TextField
|
||||||
|
label="Max Pages to Crawl"
|
||||||
|
type="number"
|
||||||
|
fullWidth
|
||||||
|
value={crawlLimit}
|
||||||
|
onChange={(e) => setCrawlLimit(parseInt(e.target.value) || 10)}
|
||||||
|
sx={{ mb: 2 }}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<Box sx={{ width: '100%', display: 'flex', justifyContent: 'flex-start', mb: 2 }}>
|
||||||
|
<Button
|
||||||
|
onClick={() => setShowCrawlAdvanced(!showCrawlAdvanced)}
|
||||||
|
sx={{
|
||||||
|
textTransform: 'none',
|
||||||
|
color: '#ff00c3',
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{showCrawlAdvanced ? 'Hide Advanced Options' : 'Advanced Options'}
|
||||||
|
</Button>
|
||||||
|
</Box>
|
||||||
|
|
||||||
|
<Collapse in={showCrawlAdvanced}>
|
||||||
|
<Box sx={{ mb: 2 }}>
|
||||||
|
<FormControl fullWidth sx={{ mb: 2 }}>
|
||||||
|
<InputLabel>Crawl Scope</InputLabel>
|
||||||
|
<Select
|
||||||
|
value={crawlMode}
|
||||||
|
label="Crawl Scope"
|
||||||
|
onChange={(e) => setCrawlMode(e.target.value as any)}
|
||||||
|
>
|
||||||
|
<MenuItem value="domain">Same Domain Only</MenuItem>
|
||||||
|
<MenuItem value="subdomain">Include Subdomains</MenuItem>
|
||||||
|
<MenuItem value="path">Specific Path Only</MenuItem>
|
||||||
|
</Select>
|
||||||
|
</FormControl>
|
||||||
|
|
||||||
|
<TextField
|
||||||
|
label="Max Depth"
|
||||||
|
type="number"
|
||||||
|
fullWidth
|
||||||
|
value={crawlMaxDepth}
|
||||||
|
onChange={(e) => setCrawlMaxDepth(parseInt(e.target.value) || 3)}
|
||||||
|
sx={{ mb: 2 }}
|
||||||
|
helperText="How many links deep to follow (default: 3)"
|
||||||
|
FormHelperTextProps={{ sx: { ml: 0 } }}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<TextField
|
||||||
|
label="Include Paths"
|
||||||
|
placeholder="Example: /products, /blog"
|
||||||
|
fullWidth
|
||||||
|
value={crawlIncludePaths}
|
||||||
|
onChange={(e) => setCrawlIncludePaths(e.target.value)}
|
||||||
|
sx={{ mb: 2 }}
|
||||||
|
helperText="Only crawl URLs matching these paths (comma-separated)"
|
||||||
|
FormHelperTextProps={{ sx: { ml: 0 } }}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<TextField
|
||||||
|
label="Exclude Paths"
|
||||||
|
placeholder="Example: /admin, /login"
|
||||||
|
fullWidth
|
||||||
|
value={crawlExcludePaths}
|
||||||
|
onChange={(e) => setCrawlExcludePaths(e.target.value)}
|
||||||
|
sx={{ mb: 2 }}
|
||||||
|
helperText="Skip URLs matching these paths (comma-separated)"
|
||||||
|
FormHelperTextProps={{ sx: { ml: 0 } }}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<Box sx={{ display: 'flex', flexDirection: 'column', gap: 1 }}>
|
||||||
|
<FormControlLabel
|
||||||
|
control={
|
||||||
|
<Checkbox
|
||||||
|
checked={crawlUseSitemap}
|
||||||
|
onChange={(e) => setCrawlUseSitemap(e.target.checked)}
|
||||||
|
/>
|
||||||
|
}
|
||||||
|
label="Use sitemap.xml for URL discovery"
|
||||||
|
/>
|
||||||
|
<FormControlLabel
|
||||||
|
control={
|
||||||
|
<Checkbox
|
||||||
|
checked={crawlFollowLinks}
|
||||||
|
onChange={(e) => setCrawlFollowLinks(e.target.checked)}
|
||||||
|
/>
|
||||||
|
}
|
||||||
|
label="Follow links on pages"
|
||||||
|
/>
|
||||||
|
<FormControlLabel
|
||||||
|
control={
|
||||||
|
<Checkbox
|
||||||
|
checked={crawlRespectRobots}
|
||||||
|
onChange={(e) => setCrawlRespectRobots(e.target.checked)}
|
||||||
|
/>
|
||||||
|
}
|
||||||
|
label="Respect robots.txt"
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
|
</Box>
|
||||||
|
</Collapse>
|
||||||
|
</Box>
|
||||||
|
|
||||||
|
<Button
|
||||||
|
variant="contained"
|
||||||
|
fullWidth
|
||||||
|
onClick={handleCreateCrawlRobot}
|
||||||
|
disabled={!crawlUrl.trim() || !crawlRobotName.trim() || isLoading}
|
||||||
|
sx={{
|
||||||
|
bgcolor: '#ff00c3',
|
||||||
|
py: 1.4,
|
||||||
|
fontSize: '1rem',
|
||||||
|
textTransform: 'none',
|
||||||
|
maxWidth: 700,
|
||||||
|
borderRadius: 2
|
||||||
|
}}
|
||||||
|
startIcon={isLoading ? <CircularProgress size={20} color="inherit" /> : null}
|
||||||
|
>
|
||||||
|
{isLoading ? 'Creating...' : 'Create Robot'}
|
||||||
|
</Button>
|
||||||
|
</Box>
|
||||||
|
</Card>
|
||||||
|
</TabPanel>
|
||||||
|
|
||||||
|
<TabPanel value={tabValue} index={3}>
|
||||||
|
<Card sx={{ mb: 4, p: 4, textAlign: 'center' }}>
|
||||||
|
<Box display="flex" flexDirection="column" alignItems="center">
|
||||||
|
<img
|
||||||
|
src="https://ik.imagekit.io/ys1blv5kv/maxunlogo.png"
|
||||||
|
width={73}
|
||||||
|
height={65}
|
||||||
|
style={{
|
||||||
|
borderRadius: '5px',
|
||||||
|
marginBottom: '30px'
|
||||||
|
}}
|
||||||
|
alt="Maxun Logo"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<Typography variant="body2" color="text.secondary" mb={3}>
|
||||||
|
Search the web and gather data from relevant results.
|
||||||
|
</Typography>
|
||||||
|
|
||||||
|
<Box sx={{ width: '100%', maxWidth: 700, mb: 2 }}>
|
||||||
|
<TextField
|
||||||
|
label="Robot Name"
|
||||||
|
placeholder="Example: AI News Monitor"
|
||||||
|
fullWidth
|
||||||
|
value={searchRobotName}
|
||||||
|
onChange={(e) => setSearchRobotName(e.target.value)}
|
||||||
|
sx={{ mb: 2 }}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<TextField
|
||||||
|
label="Search Query"
|
||||||
|
placeholder="Example: latest AI breakthroughs 2025"
|
||||||
|
fullWidth
|
||||||
|
value={searchQuery}
|
||||||
|
onChange={(e) => setSearchQuery(e.target.value)}
|
||||||
|
sx={{ mb: 2 }}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<TextField
|
||||||
|
label="Number of Results"
|
||||||
|
type="number"
|
||||||
|
fullWidth
|
||||||
|
value={searchLimit}
|
||||||
|
onChange={(e) => setSearchLimit(parseInt(e.target.value) || 10)}
|
||||||
|
sx={{ mb: 2 }}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<Box sx={{ display: 'flex', gap: 2 }}>
|
||||||
|
<FormControl fullWidth sx={{ mb: 2 }}>
|
||||||
|
<InputLabel>Mode</InputLabel>
|
||||||
|
<Select
|
||||||
|
value={searchMode}
|
||||||
|
label="Mode"
|
||||||
|
onChange={(e) => setSearchMode(e.target.value as any)}
|
||||||
|
>
|
||||||
|
<MenuItem value="discover">Discover URLs Only</MenuItem>
|
||||||
|
<MenuItem value="scrape">Extract Data from Results</MenuItem>
|
||||||
|
</Select>
|
||||||
|
</FormControl>
|
||||||
|
|
||||||
|
<FormControl fullWidth sx={{ mb: 2 }}>
|
||||||
|
<InputLabel>Time Range</InputLabel>
|
||||||
|
<Select
|
||||||
|
value={searchTimeRange}
|
||||||
|
label="Time Range"
|
||||||
|
onChange={(e) => setSearchTimeRange(e.target.value as 'day' | 'week' | 'month' | 'year' | '')}
|
||||||
|
>
|
||||||
|
<MenuItem value="">No Filter</MenuItem>
|
||||||
|
<MenuItem value="day">Past 24 Hours</MenuItem>
|
||||||
|
<MenuItem value="week">Past Week</MenuItem>
|
||||||
|
<MenuItem value="month">Past Month</MenuItem>
|
||||||
|
<MenuItem value="year">Past Year</MenuItem>
|
||||||
|
</Select>
|
||||||
|
</FormControl>
|
||||||
|
</Box>
|
||||||
|
</Box>
|
||||||
|
|
||||||
|
<Button
|
||||||
|
variant="contained"
|
||||||
|
fullWidth
|
||||||
|
onClick={handleCreateSearchRobot}
|
||||||
|
disabled={!searchQuery.trim() || !searchRobotName.trim() || isLoading}
|
||||||
|
sx={{
|
||||||
|
bgcolor: '#ff00c3',
|
||||||
|
py: 1.4,
|
||||||
|
fontSize: '1rem',
|
||||||
|
textTransform: 'none',
|
||||||
|
maxWidth: 700,
|
||||||
|
borderRadius: 2
|
||||||
|
}}
|
||||||
|
startIcon={isLoading ? <CircularProgress size={20} color="inherit" /> : null}
|
||||||
|
>
|
||||||
|
{isLoading ? 'Creating...' : 'Create Robot'}
|
||||||
|
</Button>
|
||||||
|
</Box>
|
||||||
|
</Card>
|
||||||
|
</TabPanel>
|
||||||
</Box>
|
</Box>
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ interface RobotMeta {
|
|||||||
pairs: number;
|
pairs: number;
|
||||||
updatedAt: string;
|
updatedAt: string;
|
||||||
params: any[];
|
params: any[];
|
||||||
type?: 'extract' | 'scrape';
|
type?: 'extract' | 'scrape' | 'crawl' | 'search';
|
||||||
url?: string;
|
url?: string;
|
||||||
formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[];
|
formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[];
|
||||||
isLLM?: boolean;
|
isLLM?: boolean;
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import React, { useState, useEffect } from "react";
|
import { useState, useEffect } from "react";
|
||||||
import { useTranslation } from "react-i18next";
|
import { useTranslation } from "react-i18next";
|
||||||
import {
|
import {
|
||||||
TextField,
|
TextField,
|
||||||
@@ -7,7 +7,13 @@ import {
|
|||||||
Button,
|
Button,
|
||||||
IconButton,
|
IconButton,
|
||||||
InputAdornment,
|
InputAdornment,
|
||||||
Divider,
|
FormControl,
|
||||||
|
InputLabel,
|
||||||
|
Select,
|
||||||
|
MenuItem,
|
||||||
|
FormControlLabel,
|
||||||
|
Checkbox,
|
||||||
|
Collapse
|
||||||
} from "@mui/material";
|
} from "@mui/material";
|
||||||
import { Visibility, VisibilityOff } from "@mui/icons-material";
|
import { Visibility, VisibilityOff } from "@mui/icons-material";
|
||||||
import { useGlobalInfoStore } from "../../../context/globalInfo";
|
import { useGlobalInfoStore } from "../../../context/globalInfo";
|
||||||
@@ -24,7 +30,7 @@ interface RobotMeta {
|
|||||||
pairs: number;
|
pairs: number;
|
||||||
updatedAt: string;
|
updatedAt: string;
|
||||||
params: any[];
|
params: any[];
|
||||||
type?: 'extract' | 'scrape';
|
type?: 'extract' | 'scrape' | 'crawl' | 'search';
|
||||||
url?: string;
|
url?: string;
|
||||||
formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[];
|
formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[];
|
||||||
isLLM?: boolean;
|
isLLM?: boolean;
|
||||||
@@ -97,6 +103,25 @@ interface ScrapeListLimit {
|
|||||||
currentLimit: number;
|
currentLimit: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface CrawlConfig {
|
||||||
|
mode?: string;
|
||||||
|
limit?: number;
|
||||||
|
maxDepth?: number;
|
||||||
|
useSitemap?: boolean;
|
||||||
|
followLinks?: boolean;
|
||||||
|
excludePaths?: string[];
|
||||||
|
includePaths?: string[];
|
||||||
|
respectRobots?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface SearchConfig {
|
||||||
|
mode?: 'discover' | 'scrape';
|
||||||
|
limit?: number;
|
||||||
|
query?: string;
|
||||||
|
filters?: Record<string, any>;
|
||||||
|
provider?: string;
|
||||||
|
}
|
||||||
|
|
||||||
export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
const navigate = useNavigate();
|
const navigate = useNavigate();
|
||||||
@@ -115,6 +140,9 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
|||||||
[]
|
[]
|
||||||
);
|
);
|
||||||
const [isLoading, setIsLoading] = useState(false);
|
const [isLoading, setIsLoading] = useState(false);
|
||||||
|
const [crawlConfig, setCrawlConfig] = useState<CrawlConfig>({});
|
||||||
|
const [searchConfig, setSearchConfig] = useState<SearchConfig>({});
|
||||||
|
const [showCrawlAdvanced, setShowCrawlAdvanced] = useState(false);
|
||||||
|
|
||||||
const isEmailPattern = (value: string): boolean => {
|
const isEmailPattern = (value: string): boolean => {
|
||||||
return value.includes("@");
|
return value.includes("@");
|
||||||
@@ -163,6 +191,8 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
|||||||
setCredentialGroups(groupCredentialsByType(extractedCredentials));
|
setCredentialGroups(groupCredentialsByType(extractedCredentials));
|
||||||
|
|
||||||
findScrapeListLimits(robot.recording.workflow);
|
findScrapeListLimits(robot.recording.workflow);
|
||||||
|
extractCrawlConfig(robot.recording.workflow);
|
||||||
|
extractSearchConfig(robot.recording.workflow);
|
||||||
}
|
}
|
||||||
}, [robot]);
|
}, [robot]);
|
||||||
|
|
||||||
@@ -195,6 +225,36 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
|||||||
setScrapeListLimits(limits);
|
setScrapeListLimits(limits);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const extractCrawlConfig = (workflow: WhereWhatPair[]) => {
|
||||||
|
workflow.forEach((pair) => {
|
||||||
|
if (!pair.what) return;
|
||||||
|
|
||||||
|
pair.what.forEach((action: any) => {
|
||||||
|
if (action.action === "crawl" && action.args && action.args.length > 0) {
|
||||||
|
const config = action.args[0];
|
||||||
|
if (config && typeof config === "object") {
|
||||||
|
setCrawlConfig(config as CrawlConfig);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
const extractSearchConfig = (workflow: WhereWhatPair[]) => {
|
||||||
|
workflow.forEach((pair) => {
|
||||||
|
if (!pair.what) return;
|
||||||
|
|
||||||
|
pair.what.forEach((action: any) => {
|
||||||
|
if (action.action === "search" && action.args && action.args.length > 0) {
|
||||||
|
const config = action.args[0];
|
||||||
|
if (config && typeof config === "object") {
|
||||||
|
setSearchConfig(config as SearchConfig);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
function extractInitialCredentials(workflow: any[]): Credentials {
|
function extractInitialCredentials(workflow: any[]): Credentials {
|
||||||
const credentials: Credentials = {};
|
const credentials: Credentials = {};
|
||||||
|
|
||||||
@@ -475,19 +535,17 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
|||||||
<>
|
<>
|
||||||
{renderCredentialFields(
|
{renderCredentialFields(
|
||||||
credentialGroups.usernames,
|
credentialGroups.usernames,
|
||||||
t("Username"),
|
t("Username")
|
||||||
"text"
|
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{renderCredentialFields(credentialGroups.emails, t("Email"), "text")}
|
{renderCredentialFields(credentialGroups.emails, t("Email"))}
|
||||||
|
|
||||||
{renderCredentialFields(
|
{renderCredentialFields(
|
||||||
credentialGroups.passwords,
|
credentialGroups.passwords,
|
||||||
t("Password"),
|
t("Password")
|
||||||
"password"
|
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{renderCredentialFields(credentialGroups.others, t("Other"), "text")}
|
{renderCredentialFields(credentialGroups.others, t("Other"))}
|
||||||
</>
|
</>
|
||||||
);
|
);
|
||||||
};
|
};
|
||||||
@@ -502,7 +560,6 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
|||||||
</Typography>
|
</Typography>
|
||||||
|
|
||||||
{scrapeListLimits.map((limitInfo, index) => {
|
{scrapeListLimits.map((limitInfo, index) => {
|
||||||
// Get the corresponding scrapeList action to extract its name
|
|
||||||
const scrapeListAction = robot?.recording?.workflow?.[limitInfo.pairIndex]?.what?.[limitInfo.actionIndex];
|
const scrapeListAction = robot?.recording?.workflow?.[limitInfo.pairIndex]?.what?.[limitInfo.actionIndex];
|
||||||
const actionName =
|
const actionName =
|
||||||
scrapeListAction?.name ||
|
scrapeListAction?.name ||
|
||||||
@@ -542,7 +599,6 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
|||||||
const screenshotInputs: JSX.Element[] = [];
|
const screenshotInputs: JSX.Element[] = [];
|
||||||
const listInputs: JSX.Element[] = [];
|
const listInputs: JSX.Element[] = [];
|
||||||
|
|
||||||
let textCount = 0;
|
|
||||||
let screenshotCount = 0;
|
let screenshotCount = 0;
|
||||||
let listCount = 0;
|
let listCount = 0;
|
||||||
|
|
||||||
@@ -683,7 +739,6 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
|||||||
const renderCredentialFields = (
|
const renderCredentialFields = (
|
||||||
selectors: string[],
|
selectors: string[],
|
||||||
headerText: string,
|
headerText: string,
|
||||||
defaultType: "text" | "password" = "text"
|
|
||||||
) => {
|
) => {
|
||||||
if (selectors.length === 0) return null;
|
if (selectors.length === 0) return null;
|
||||||
|
|
||||||
@@ -737,6 +792,193 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
|||||||
return url;
|
return url;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const renderCrawlConfigFields = () => {
|
||||||
|
if (robot?.recording_meta.type !== 'crawl') return null;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
<TextField
|
||||||
|
label="Max Pages to Crawl"
|
||||||
|
type="number"
|
||||||
|
fullWidth
|
||||||
|
value={crawlConfig.limit || 10}
|
||||||
|
onChange={(e) => {
|
||||||
|
const value = parseInt(e.target.value, 10);
|
||||||
|
if (value >= 1) {
|
||||||
|
setCrawlConfig((prev) => ({ ...prev, limit: value }));
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
inputProps={{ min: 1 }}
|
||||||
|
style={{ marginBottom: "20px" }}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<Button
|
||||||
|
onClick={() => setShowCrawlAdvanced(!showCrawlAdvanced)}
|
||||||
|
sx={{
|
||||||
|
mb: 2,
|
||||||
|
textTransform: 'none',
|
||||||
|
color: '#ff00c3'
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{showCrawlAdvanced ? 'Hide Advanced Options' : 'Advanced Options'}
|
||||||
|
</Button>
|
||||||
|
|
||||||
|
<Collapse in={showCrawlAdvanced}>
|
||||||
|
<Box sx={{ mb: 2 }}>
|
||||||
|
<FormControl fullWidth sx={{ mb: 2 }}>
|
||||||
|
<InputLabel>Crawl Scope</InputLabel>
|
||||||
|
<Select
|
||||||
|
value={crawlConfig.mode || 'domain'}
|
||||||
|
label="Crawl Scope"
|
||||||
|
onChange={(e) => setCrawlConfig((prev) => ({ ...prev, mode: e.target.value }))}
|
||||||
|
>
|
||||||
|
<MenuItem value="domain">Same Domain Only</MenuItem>
|
||||||
|
<MenuItem value="subdomain">Include Subdomains</MenuItem>
|
||||||
|
<MenuItem value="path">Specific Path Only</MenuItem>
|
||||||
|
</Select>
|
||||||
|
</FormControl>
|
||||||
|
|
||||||
|
<TextField
|
||||||
|
label="Max Depth"
|
||||||
|
type="number"
|
||||||
|
fullWidth
|
||||||
|
value={crawlConfig.maxDepth || 3}
|
||||||
|
onChange={(e) => {
|
||||||
|
const value = parseInt(e.target.value, 10);
|
||||||
|
if (value >= 1) {
|
||||||
|
setCrawlConfig((prev) => ({ ...prev, maxDepth: value }));
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
inputProps={{ min: 1 }}
|
||||||
|
sx={{ mb: 2 }}
|
||||||
|
helperText="How many links deep to follow (default: 3)"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<TextField
|
||||||
|
label="Include Paths"
|
||||||
|
placeholder="Example: /products, /blog"
|
||||||
|
fullWidth
|
||||||
|
value={crawlConfig.includePaths?.join(', ') || ''}
|
||||||
|
onChange={(e) => {
|
||||||
|
const paths = e.target.value ? e.target.value.split(',').map(p => p.trim()) : [];
|
||||||
|
setCrawlConfig((prev) => ({ ...prev, includePaths: paths }));
|
||||||
|
}}
|
||||||
|
sx={{ mb: 2 }}
|
||||||
|
helperText="Only crawl URLs matching these paths (comma-separated)"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<TextField
|
||||||
|
label="Exclude Paths"
|
||||||
|
placeholder="Example: /admin, /login"
|
||||||
|
fullWidth
|
||||||
|
value={crawlConfig.excludePaths?.join(', ') || ''}
|
||||||
|
onChange={(e) => {
|
||||||
|
const paths = e.target.value ? e.target.value.split(',').map(p => p.trim()) : [];
|
||||||
|
setCrawlConfig((prev) => ({ ...prev, excludePaths: paths }));
|
||||||
|
}}
|
||||||
|
sx={{ mb: 2 }}
|
||||||
|
helperText="Skip URLs matching these paths (comma-separated)"
|
||||||
|
/>
|
||||||
|
|
||||||
|
<Box sx={{ display: 'flex', flexDirection: 'column', gap: 1 }}>
|
||||||
|
<FormControlLabel
|
||||||
|
control={
|
||||||
|
<Checkbox
|
||||||
|
checked={crawlConfig.useSitemap ?? true}
|
||||||
|
onChange={(e) => setCrawlConfig((prev) => ({ ...prev, useSitemap: e.target.checked }))}
|
||||||
|
/>
|
||||||
|
}
|
||||||
|
label="Use sitemap.xml for URL discovery"
|
||||||
|
/>
|
||||||
|
<FormControlLabel
|
||||||
|
control={
|
||||||
|
<Checkbox
|
||||||
|
checked={crawlConfig.followLinks ?? true}
|
||||||
|
onChange={(e) => setCrawlConfig((prev) => ({ ...prev, followLinks: e.target.checked }))}
|
||||||
|
/>
|
||||||
|
}
|
||||||
|
label="Follow links on pages"
|
||||||
|
/>
|
||||||
|
<FormControlLabel
|
||||||
|
control={
|
||||||
|
<Checkbox
|
||||||
|
checked={crawlConfig.respectRobots ?? true}
|
||||||
|
onChange={(e) => setCrawlConfig((prev) => ({ ...prev, respectRobots: e.target.checked }))}
|
||||||
|
/>
|
||||||
|
}
|
||||||
|
label="Respect robots.txt"
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
|
</Box>
|
||||||
|
</Collapse>
|
||||||
|
</>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
|
const renderSearchConfigFields = () => {
|
||||||
|
if (robot?.recording_meta.type !== 'search') return null;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<>
|
||||||
|
<TextField
|
||||||
|
label="Search Query"
|
||||||
|
placeholder="Example: latest AI breakthroughs 2025"
|
||||||
|
fullWidth
|
||||||
|
value={searchConfig.query || ''}
|
||||||
|
onChange={(e) => {
|
||||||
|
setSearchConfig((prev) => ({ ...prev, query: e.target.value }));
|
||||||
|
}}
|
||||||
|
sx={{ mb: 2 }}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<TextField
|
||||||
|
label="Number of Results"
|
||||||
|
type="number"
|
||||||
|
fullWidth
|
||||||
|
value={searchConfig.limit || 10}
|
||||||
|
onChange={(e) => {
|
||||||
|
const value = parseInt(e.target.value, 10);
|
||||||
|
if (value >= 1) {
|
||||||
|
setSearchConfig((prev) => ({ ...prev, limit: value }));
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
inputProps={{ min: 1 }}
|
||||||
|
sx={{ mb: 2 }}
|
||||||
|
/>
|
||||||
|
|
||||||
|
<FormControl fullWidth sx={{ mb: 2 }}>
|
||||||
|
<InputLabel>Mode</InputLabel>
|
||||||
|
<Select
|
||||||
|
value={searchConfig.mode || 'discover'}
|
||||||
|
label="Mode"
|
||||||
|
onChange={(e) => setSearchConfig((prev) => ({ ...prev, mode: e.target.value as 'discover' | 'scrape' }))}
|
||||||
|
>
|
||||||
|
<MenuItem value="discover">Discover URLs Only</MenuItem>
|
||||||
|
<MenuItem value="scrape">Extract Data from Results</MenuItem>
|
||||||
|
</Select>
|
||||||
|
</FormControl>
|
||||||
|
|
||||||
|
<FormControl fullWidth sx={{ mb: 2 }}>
|
||||||
|
<InputLabel>Time Range</InputLabel>
|
||||||
|
<Select
|
||||||
|
value={searchConfig.filters?.timeRange || ''}
|
||||||
|
label="Time Range"
|
||||||
|
onChange={(e) => setSearchConfig((prev) => ({
|
||||||
|
...prev,
|
||||||
|
filters: { ...prev.filters, timeRange: e.target.value as '' | 'day' | 'week' | 'month' | 'year' || undefined }
|
||||||
|
}))}
|
||||||
|
>
|
||||||
|
<MenuItem value="">No Filter</MenuItem>
|
||||||
|
<MenuItem value="day">Past 24 Hours</MenuItem>
|
||||||
|
<MenuItem value="week">Past Week</MenuItem>
|
||||||
|
<MenuItem value="month">Past Month</MenuItem>
|
||||||
|
<MenuItem value="year">Past Year</MenuItem>
|
||||||
|
</Select>
|
||||||
|
</FormControl>
|
||||||
|
</>
|
||||||
|
);
|
||||||
|
};
|
||||||
|
|
||||||
const handleSave = async () => {
|
const handleSave = async () => {
|
||||||
if (!robot) return;
|
if (!robot) return;
|
||||||
|
|
||||||
@@ -757,6 +999,48 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
|||||||
|
|
||||||
const targetUrl = getTargetUrl();
|
const targetUrl = getTargetUrl();
|
||||||
|
|
||||||
|
let updatedWorkflow = robot.recording.workflow;
|
||||||
|
if (robot.recording_meta.type === 'crawl') {
|
||||||
|
updatedWorkflow = updatedWorkflow.map((pair: any) => {
|
||||||
|
if (!pair.what) return pair;
|
||||||
|
|
||||||
|
return {
|
||||||
|
...pair,
|
||||||
|
what: pair.what.map((action: any) => {
|
||||||
|
if (action.action === 'crawl') {
|
||||||
|
return {
|
||||||
|
...action,
|
||||||
|
args: [{ ...crawlConfig }]
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return action;
|
||||||
|
})
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (robot.recording_meta.type === 'search') {
|
||||||
|
updatedWorkflow = updatedWorkflow.map((pair: any) => {
|
||||||
|
if (!pair.what) return pair;
|
||||||
|
|
||||||
|
return {
|
||||||
|
...pair,
|
||||||
|
what: pair.what.map((action: any) => {
|
||||||
|
if (action.action === 'search') {
|
||||||
|
return {
|
||||||
|
...action,
|
||||||
|
args: [{
|
||||||
|
...searchConfig,
|
||||||
|
provider: 'duckduckgo'
|
||||||
|
}]
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return action;
|
||||||
|
})
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
const payload: any = {
|
const payload: any = {
|
||||||
name: robot.recording_meta.name,
|
name: robot.recording_meta.name,
|
||||||
limits: scrapeListLimits.map((limit) => ({
|
limits: scrapeListLimits.map((limit) => ({
|
||||||
@@ -767,7 +1051,6 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
|||||||
})),
|
})),
|
||||||
credentials: credentialsForPayload,
|
credentials: credentialsForPayload,
|
||||||
targetUrl: targetUrl,
|
targetUrl: targetUrl,
|
||||||
// send the (possibly edited) workflow so backend can persist action name changes
|
|
||||||
workflow: robot.recording.workflow,
|
workflow: robot.recording.workflow,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -825,19 +1108,12 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
|||||||
onChange={(e) => handleTargetUrlChange(e.target.value)}
|
onChange={(e) => handleTargetUrlChange(e.target.value)}
|
||||||
style={{ marginBottom: "20px" }}
|
style={{ marginBottom: "20px" }}
|
||||||
/>
|
/>
|
||||||
{renderScrapeListLimitFields() && (
|
|
||||||
<>
|
{renderCrawlConfigFields()}
|
||||||
<Divider />
|
{renderSearchConfigFields()}
|
||||||
{renderScrapeListLimitFields()}
|
|
||||||
</>
|
|
||||||
)}
|
|
||||||
|
|
||||||
{renderActionNameFields() && (
|
{renderScrapeListLimitFields()}
|
||||||
<>
|
{renderActionNameFields()}
|
||||||
<Divider />
|
|
||||||
{renderActionNameFields()}
|
|
||||||
</>
|
|
||||||
)}
|
|
||||||
</>
|
</>
|
||||||
)}
|
)}
|
||||||
</Box>
|
</Box>
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
import React, { useState, useEffect } from "react";
|
import { useState, useEffect } from "react";
|
||||||
import { useTranslation } from "react-i18next";
|
import { useTranslation } from "react-i18next";
|
||||||
import { TextField, Typography, Box, Card, CardContent } from "@mui/material";
|
import { TextField, Box } from "@mui/material";
|
||||||
import { Settings, Info } from "@mui/icons-material";
|
|
||||||
import { useGlobalInfoStore } from "../../../context/globalInfo";
|
import { useGlobalInfoStore } from "../../../context/globalInfo";
|
||||||
import { getStoredRecording } from "../../../api/storage";
|
import { getStoredRecording } from "../../../api/storage";
|
||||||
import { WhereWhatPair } from "maxun-core";
|
import { WhereWhatPair } from "maxun-core";
|
||||||
@@ -16,7 +15,7 @@ interface RobotMeta {
|
|||||||
pairs: number;
|
pairs: number;
|
||||||
updatedAt: string;
|
updatedAt: string;
|
||||||
params: any[];
|
params: any[];
|
||||||
type?: 'extract' | 'scrape';
|
type?: 'extract' | 'scrape' | 'crawl' | 'search';
|
||||||
url?: string;
|
url?: string;
|
||||||
formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[];
|
formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[];
|
||||||
isLLM?: boolean;
|
isLLM?: boolean;
|
||||||
@@ -116,19 +115,11 @@ export const RobotSettingsPage = ({ handleStart }: RobotSettingsProps) => {
|
|||||||
fetchUserEmail();
|
fetchUserEmail();
|
||||||
}, [robot?.userId]);
|
}, [robot?.userId]);
|
||||||
|
|
||||||
const handleCancel = () => {
|
|
||||||
const basePath = location.pathname.includes("/prebuilt-robots")
|
|
||||||
? "/prebuilt-robots"
|
|
||||||
: "/robots";
|
|
||||||
navigate(basePath);
|
|
||||||
};
|
|
||||||
|
|
||||||
const targetUrl = getTargetUrl();
|
const targetUrl = getTargetUrl();
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<RobotConfigPage
|
<RobotConfigPage
|
||||||
title={t("robot_settings.title")}
|
title={t("robot_settings.title")}
|
||||||
onCancel={handleCancel}
|
|
||||||
cancelButtonText={t("robot_settings.buttons.close")}
|
cancelButtonText={t("robot_settings.buttons.close")}
|
||||||
showSaveButton={false}
|
showSaveButton={false}
|
||||||
showCancelButton={false}
|
showCancelButton={false}
|
||||||
@@ -137,15 +128,17 @@ export const RobotSettingsPage = ({ handleStart }: RobotSettingsProps) => {
|
|||||||
<Box style={{ display: "flex", flexDirection: "column" }}>
|
<Box style={{ display: "flex", flexDirection: "column" }}>
|
||||||
{robot && (
|
{robot && (
|
||||||
<>
|
<>
|
||||||
<TextField
|
{robot.recording_meta.type !== 'search' && (
|
||||||
label={t("robot_settings.target_url")}
|
<TextField
|
||||||
key="Robot Target URL"
|
label={t("robot_settings.target_url")}
|
||||||
value={targetUrl}
|
key="Robot Target URL"
|
||||||
InputProps={{
|
value={targetUrl}
|
||||||
readOnly: true,
|
InputProps={{
|
||||||
}}
|
readOnly: true,
|
||||||
style={{ marginBottom: "20px" }}
|
}}
|
||||||
/>
|
style={{ marginBottom: "20px" }}
|
||||||
|
/>
|
||||||
|
)}
|
||||||
<TextField
|
<TextField
|
||||||
label={t("robot_settings.robot_id")}
|
label={t("robot_settings.robot_id")}
|
||||||
key="Robot ID"
|
key="Robot ID"
|
||||||
|
|||||||
@@ -1,20 +1,20 @@
|
|||||||
import {
|
import {
|
||||||
Box,
|
Box,
|
||||||
Tabs,
|
|
||||||
Typography,
|
Typography,
|
||||||
Tab,
|
|
||||||
Paper,
|
Paper,
|
||||||
Button,
|
Button,
|
||||||
CircularProgress,
|
CircularProgress,
|
||||||
Accordion,
|
Accordion,
|
||||||
AccordionSummary,
|
AccordionSummary,
|
||||||
AccordionDetails
|
AccordionDetails,
|
||||||
|
Link
|
||||||
} from "@mui/material";
|
} from "@mui/material";
|
||||||
import * as React from "react";
|
import * as React from "react";
|
||||||
import { Data } from "./RunsTable";
|
import { Data } from "./RunsTable";
|
||||||
import { TabPanel, TabContext } from "@mui/lab";
|
import { TabPanel, TabContext } from "@mui/lab";
|
||||||
import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
|
import ExpandMoreIcon from '@mui/icons-material/ExpandMore';
|
||||||
import { useEffect, useState } from "react";
|
import { useEffect, useState } from "react";
|
||||||
|
import JSZip from "jszip";
|
||||||
import Table from '@mui/material/Table';
|
import Table from '@mui/material/Table';
|
||||||
import TableBody from '@mui/material/TableBody';
|
import TableBody from '@mui/material/TableBody';
|
||||||
import TableCell from '@mui/material/TableCell';
|
import TableCell from '@mui/material/TableCell';
|
||||||
@@ -34,6 +34,7 @@ interface RunContentProps {
|
|||||||
|
|
||||||
export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRef, abortRunHandler }: RunContentProps) => {
|
export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRef, abortRunHandler }: RunContentProps) => {
|
||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
|
const { darkMode } = useThemeMode();
|
||||||
const [tab, setTab] = React.useState<string>('output');
|
const [tab, setTab] = React.useState<string>('output');
|
||||||
const [markdownContent, setMarkdownContent] = useState<string>('');
|
const [markdownContent, setMarkdownContent] = useState<string>('');
|
||||||
const [htmlContent, setHtmlContent] = useState<string>('');
|
const [htmlContent, setHtmlContent] = useState<string>('');
|
||||||
@@ -50,6 +51,15 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
|
|||||||
const [listKeys, setListKeys] = useState<string[]>([]);
|
const [listKeys, setListKeys] = useState<string[]>([]);
|
||||||
const [currentListIndex, setCurrentListIndex] = useState<number>(0);
|
const [currentListIndex, setCurrentListIndex] = useState<number>(0);
|
||||||
|
|
||||||
|
const [crawlData, setCrawlData] = useState<any[][]>([]);
|
||||||
|
const [crawlColumns, setCrawlColumns] = useState<string[][]>([]);
|
||||||
|
const [crawlKeys, setCrawlKeys] = useState<string[]>([]);
|
||||||
|
const [currentCrawlIndex, setCurrentCrawlIndex] = useState<number>(0);
|
||||||
|
|
||||||
|
const [searchData, setSearchData] = useState<any[]>([]);
|
||||||
|
const [searchMode, setSearchMode] = useState<'discover' | 'scrape'>('discover');
|
||||||
|
const [currentSearchIndex, setCurrentSearchIndex] = useState<number>(0);
|
||||||
|
|
||||||
const [screenshotKeys, setScreenshotKeys] = useState<string[]>([]);
|
const [screenshotKeys, setScreenshotKeys] = useState<string[]>([]);
|
||||||
const [screenshotKeyMap, setScreenshotKeyMap] = useState<Record<string, string>>({});
|
const [screenshotKeyMap, setScreenshotKeyMap] = useState<Record<string, string>>({});
|
||||||
const [currentScreenshotIndex, setCurrentScreenshotIndex] = useState<number>(0);
|
const [currentScreenshotIndex, setCurrentScreenshotIndex] = useState<number>(0);
|
||||||
@@ -93,6 +103,10 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
|
|||||||
setListData([]);
|
setListData([]);
|
||||||
setListColumns([]);
|
setListColumns([]);
|
||||||
setListKeys([]);
|
setListKeys([]);
|
||||||
|
setCrawlData([]);
|
||||||
|
setCrawlColumns([]);
|
||||||
|
setCrawlKeys([]);
|
||||||
|
setSearchData([]);
|
||||||
setLegacyData([]);
|
setLegacyData([]);
|
||||||
setLegacyColumns([]);
|
setLegacyColumns([]);
|
||||||
setIsLegacyData(false);
|
setIsLegacyData(false);
|
||||||
@@ -104,7 +118,7 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
|
|||||||
|
|
||||||
const hasLegacySchema = row.serializableOutput.scrapeSchema && Array.isArray(row.serializableOutput.scrapeSchema);
|
const hasLegacySchema = row.serializableOutput.scrapeSchema && Array.isArray(row.serializableOutput.scrapeSchema);
|
||||||
const hasLegacyList = row.serializableOutput.scrapeList && Array.isArray(row.serializableOutput.scrapeList);
|
const hasLegacyList = row.serializableOutput.scrapeList && Array.isArray(row.serializableOutput.scrapeList);
|
||||||
const hasOldFormat = !row.serializableOutput.scrapeSchema && !row.serializableOutput.scrapeList && Object.keys(row.serializableOutput).length > 0;
|
const hasOldFormat = !row.serializableOutput.scrapeSchema && !row.serializableOutput.scrapeList && !row.serializableOutput.crawl && !row.serializableOutput.search && Object.keys(row.serializableOutput).length > 0;
|
||||||
|
|
||||||
if (hasLegacySchema || hasLegacyList || hasOldFormat) {
|
if (hasLegacySchema || hasLegacyList || hasOldFormat) {
|
||||||
processLegacyData(row.serializableOutput);
|
processLegacyData(row.serializableOutput);
|
||||||
@@ -121,6 +135,14 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
|
|||||||
if (row.serializableOutput.scrapeList) {
|
if (row.serializableOutput.scrapeList) {
|
||||||
processScrapeList(row.serializableOutput.scrapeList);
|
processScrapeList(row.serializableOutput.scrapeList);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (row.serializableOutput.crawl) {
|
||||||
|
processCrawl(row.serializableOutput.crawl);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (row.serializableOutput.search) {
|
||||||
|
processSearch(row.serializableOutput.search);
|
||||||
|
}
|
||||||
}, [row.serializableOutput, row.status]);
|
}, [row.serializableOutput, row.status]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
@@ -139,7 +161,6 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
|
|||||||
let normalizedScreenshotKeys: string[];
|
let normalizedScreenshotKeys: string[];
|
||||||
|
|
||||||
if (isLegacyPattern) {
|
if (isLegacyPattern) {
|
||||||
// Legacy unnamed screenshots → Screenshot 1, Screenshot 2...
|
|
||||||
normalizedScreenshotKeys = rawKeys.map((_, index) => `Screenshot ${index + 1}`);
|
normalizedScreenshotKeys = rawKeys.map((_, index) => `Screenshot ${index + 1}`);
|
||||||
} else {
|
} else {
|
||||||
normalizedScreenshotKeys = rawKeys.map((key, index) => {
|
normalizedScreenshotKeys = rawKeys.map((key, index) => {
|
||||||
@@ -342,6 +363,76 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
|
|||||||
setCurrentListIndex(0);
|
setCurrentListIndex(0);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const processCrawl = (crawlDataInput: any) => {
|
||||||
|
const tablesList: any[][] = [];
|
||||||
|
const columnsList: string[][] = [];
|
||||||
|
const keys: string[] = [];
|
||||||
|
|
||||||
|
if (typeof crawlDataInput === 'object') {
|
||||||
|
Object.keys(crawlDataInput).forEach(key => {
|
||||||
|
const tableData = crawlDataInput[key];
|
||||||
|
|
||||||
|
if (Array.isArray(tableData) && tableData.length > 0) {
|
||||||
|
const filteredData = tableData.filter(row =>
|
||||||
|
row && typeof row === 'object' && Object.values(row).some(value => value !== undefined && value !== "")
|
||||||
|
);
|
||||||
|
|
||||||
|
if (filteredData.length > 0) {
|
||||||
|
tablesList.push(filteredData);
|
||||||
|
keys.push(key);
|
||||||
|
const tableColumns = new Set<string>();
|
||||||
|
filteredData.forEach(item => {
|
||||||
|
Object.keys(item).forEach(key => tableColumns.add(key));
|
||||||
|
});
|
||||||
|
columnsList.push(Array.from(tableColumns));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
setCrawlData(tablesList);
|
||||||
|
setCrawlColumns(columnsList);
|
||||||
|
const normalizedCrawlKeys = keys.map((key, index) => {
|
||||||
|
if (!key || key.toLowerCase().includes("crawl")) {
|
||||||
|
return `Crawl ${index + 1}`;
|
||||||
|
}
|
||||||
|
return key;
|
||||||
|
});
|
||||||
|
|
||||||
|
setCrawlKeys(normalizedCrawlKeys);
|
||||||
|
setCurrentCrawlIndex(0);
|
||||||
|
};
|
||||||
|
|
||||||
|
const processSearch = (searchDataInput: any) => {
|
||||||
|
if (typeof searchDataInput === 'object') {
|
||||||
|
const keys = Object.keys(searchDataInput);
|
||||||
|
|
||||||
|
if (keys.length > 0) {
|
||||||
|
const searchKey = keys[0];
|
||||||
|
const searchInfo = searchDataInput[searchKey];
|
||||||
|
|
||||||
|
if (searchInfo && searchInfo.results && Array.isArray(searchInfo.results)) {
|
||||||
|
const mode = searchInfo.mode || 'discover';
|
||||||
|
setSearchMode(mode);
|
||||||
|
|
||||||
|
if (mode === 'scrape') {
|
||||||
|
setSearchData(searchInfo.results);
|
||||||
|
} else {
|
||||||
|
const normalizedResults = searchInfo.results.map((result: any, index: number) => ({
|
||||||
|
title: result.title || '-',
|
||||||
|
url: result.url || '-',
|
||||||
|
description: result.description || '-',
|
||||||
|
position: result.position || index + 1,
|
||||||
|
}));
|
||||||
|
setSearchData(normalizedResults);
|
||||||
|
}
|
||||||
|
|
||||||
|
setCurrentSearchIndex(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const convertToCSV = (data: any[], columns: string[], isSchemaData: boolean = false, isTabular: boolean = false): string => {
|
const convertToCSV = (data: any[], columns: string[], isSchemaData: boolean = false, isTabular: boolean = false): string => {
|
||||||
if (isSchemaData && !isTabular && data.length === 1) {
|
if (isSchemaData && !isTabular && data.length === 1) {
|
||||||
const header = 'Label,Value';
|
const header = 'Label,Value';
|
||||||
@@ -362,7 +453,6 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Function to download a specific dataset as CSV
|
|
||||||
const downloadCSV = (data: any[], columns: string[], filename: string, isSchemaData: boolean = false, isTabular: boolean = false) => {
|
const downloadCSV = (data: any[], columns: string[], filename: string, isSchemaData: boolean = false, isTabular: boolean = false) => {
|
||||||
const csvContent = convertToCSV(data, columns, isSchemaData, isTabular);
|
const csvContent = convertToCSV(data, columns, isSchemaData, isTabular);
|
||||||
const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
|
const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
|
||||||
@@ -413,6 +503,33 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
|
|||||||
}, 100);
|
}, 100);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const downloadAllCrawlsAsZip = async (crawlDataArray: any[], zipFilename: string) => {
|
||||||
|
const zip = new JSZip();
|
||||||
|
|
||||||
|
crawlDataArray.forEach((item, index) => {
|
||||||
|
const url = item?.metadata?.url || item?.url || '';
|
||||||
|
const filename = url
|
||||||
|
? url.replace(/^https?:\/\//, '').replace(/\//g, '_').replace(/[^a-zA-Z0-9_.-]/g, '_') + '.json'
|
||||||
|
: `crawl_url_${index + 1}.json`;
|
||||||
|
|
||||||
|
const jsonContent = JSON.stringify(item, null, 2);
|
||||||
|
zip.file(filename, jsonContent);
|
||||||
|
});
|
||||||
|
|
||||||
|
const blob = await zip.generateAsync({ type: 'blob' });
|
||||||
|
const url = URL.createObjectURL(blob);
|
||||||
|
|
||||||
|
const link = document.createElement("a");
|
||||||
|
link.href = url;
|
||||||
|
link.setAttribute("download", zipFilename);
|
||||||
|
document.body.appendChild(link);
|
||||||
|
link.click();
|
||||||
|
document.body.removeChild(link);
|
||||||
|
|
||||||
|
setTimeout(() => {
|
||||||
|
URL.revokeObjectURL(url);
|
||||||
|
}, 100);
|
||||||
|
};
|
||||||
|
|
||||||
const renderDataTable = (
|
const renderDataTable = (
|
||||||
data: any[],
|
data: any[],
|
||||||
@@ -420,14 +537,13 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
|
|||||||
title: string,
|
title: string,
|
||||||
csvFilename: string,
|
csvFilename: string,
|
||||||
jsonFilename: string,
|
jsonFilename: string,
|
||||||
isPaginatedList: boolean = false,
|
|
||||||
isSchemaData: boolean = false
|
isSchemaData: boolean = false
|
||||||
) => {
|
) => {
|
||||||
if (data.length === 0) return null;
|
if (data.length === 0) return null;
|
||||||
|
|
||||||
const shouldShowAsKeyValue = isSchemaData && !isSchemaTabular && data.length === 1;
|
const shouldShowAsKeyValue = isSchemaData && !isSchemaTabular && data.length === 1;
|
||||||
|
|
||||||
if (title === '') {
|
if (!title || title.trim() === '') {
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
<Box sx={{ mb: 2 }}>
|
<Box sx={{ mb: 2 }}>
|
||||||
@@ -673,7 +789,7 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
|
|||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
const hasData = schemaData.length > 0 || listData.length > 0 || legacyData.length > 0;
|
const hasData = schemaData.length > 0 || listData.length > 0 || crawlData.length > 0 || searchData.length > 0 || legacyData.length > 0;
|
||||||
const hasScreenshots = row.binaryOutput && Object.keys(row.binaryOutput).length > 0;
|
const hasScreenshots = row.binaryOutput && Object.keys(row.binaryOutput).length > 0;
|
||||||
const hasMarkdown = markdownContent.length > 0;
|
const hasMarkdown = markdownContent.length > 0;
|
||||||
const hasHTML = htmlContent.length > 0;
|
const hasHTML = htmlContent.length > 0;
|
||||||
@@ -805,7 +921,6 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
|
|||||||
)}
|
)}
|
||||||
</>
|
</>
|
||||||
) : (
|
) : (
|
||||||
// Extract robot output
|
|
||||||
<>
|
<>
|
||||||
{row.status === 'running' || row.status === 'queued' ? (
|
{row.status === 'running' || row.status === 'queued' ? (
|
||||||
<>
|
<>
|
||||||
@@ -884,7 +999,6 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
|
|||||||
'',
|
'',
|
||||||
`${schemaKeys[currentSchemaIndex] || 'schema_data'}.csv`,
|
`${schemaKeys[currentSchemaIndex] || 'schema_data'}.csv`,
|
||||||
`${schemaKeys[currentSchemaIndex] || 'schema_data'}.json`,
|
`${schemaKeys[currentSchemaIndex] || 'schema_data'}.json`,
|
||||||
false,
|
|
||||||
true
|
true
|
||||||
)}
|
)}
|
||||||
</AccordionDetails>
|
</AccordionDetails>
|
||||||
@@ -1033,6 +1147,588 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
|
|||||||
)}
|
)}
|
||||||
</>
|
</>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
{crawlData.length > 0 && crawlData[0] && crawlData[0].length > 0 && (
|
||||||
|
<Accordion defaultExpanded sx={{ mb: 2 }}>
|
||||||
|
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||||
|
<Box sx={{ display: 'flex', alignItems: 'center' }}>
|
||||||
|
<Typography variant='h6'>
|
||||||
|
Crawl Results
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</AccordionSummary>
|
||||||
|
<AccordionDetails>
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
display: 'flex',
|
||||||
|
overflowX: 'auto',
|
||||||
|
borderBottom: '1px solid',
|
||||||
|
borderColor: darkMode ? '#2a3441' : '#dee2e6',
|
||||||
|
mb: 2,
|
||||||
|
'&::-webkit-scrollbar': {
|
||||||
|
height: '8px',
|
||||||
|
},
|
||||||
|
'&::-webkit-scrollbar-track': {
|
||||||
|
backgroundColor: darkMode ? '#1e1e1e' : '#f1f1f1',
|
||||||
|
},
|
||||||
|
'&::-webkit-scrollbar-thumb': {
|
||||||
|
backgroundColor: darkMode ? '#555' : '#888',
|
||||||
|
borderRadius: '4px',
|
||||||
|
},
|
||||||
|
'&::-webkit-scrollbar-thumb:hover': {
|
||||||
|
backgroundColor: '#FF00C3',
|
||||||
|
},
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{crawlData[0].map((item: any, idx: number) => {
|
||||||
|
const url = item?.metadata?.url || item?.url || `URL ${idx + 1}`;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Box
|
||||||
|
key={idx}
|
||||||
|
onClick={() => setCurrentCrawlIndex(idx)}
|
||||||
|
sx={{
|
||||||
|
px: 2,
|
||||||
|
py: 1,
|
||||||
|
cursor: 'pointer',
|
||||||
|
backgroundColor: currentCrawlIndex === idx
|
||||||
|
? darkMode ? '#121111ff' : '#e9ecef'
|
||||||
|
: 'transparent',
|
||||||
|
borderBottom: currentCrawlIndex === idx ? '3px solid #FF00C3' : 'none',
|
||||||
|
color: darkMode ? '#fff' : '#000',
|
||||||
|
whiteSpace: 'nowrap',
|
||||||
|
fontSize: '0.875rem',
|
||||||
|
flexShrink: 0,
|
||||||
|
}}
|
||||||
|
title={url}
|
||||||
|
>
|
||||||
|
Link {idx + 1}
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</Box>
|
||||||
|
|
||||||
|
{crawlData[0][currentCrawlIndex] && (
|
||||||
|
<>
|
||||||
|
<Accordion defaultExpanded sx={{ mb: 2 }}>
|
||||||
|
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||||
|
<Box sx={{ display: 'flex', alignItems: 'center' }}>
|
||||||
|
<Typography variant='h6'>
|
||||||
|
Metadata
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</AccordionSummary>
|
||||||
|
<AccordionDetails>
|
||||||
|
<TableContainer component={Paper} sx={{ maxHeight: 300 }}>
|
||||||
|
<Table size="small">
|
||||||
|
<TableBody>
|
||||||
|
{crawlData[0][currentCrawlIndex].metadata &&
|
||||||
|
Object.entries(crawlData[0][currentCrawlIndex].metadata).map(([key, value]: [string, any]) => (
|
||||||
|
<TableRow key={key}>
|
||||||
|
<TableCell sx={{ fontWeight: 500, width: '200px' }}>
|
||||||
|
{key}
|
||||||
|
</TableCell>
|
||||||
|
<TableCell sx={{ wordBreak: 'break-word' }}>
|
||||||
|
{value === undefined || value === '' ? '-' : String(value)}
|
||||||
|
</TableCell>
|
||||||
|
</TableRow>
|
||||||
|
))
|
||||||
|
}
|
||||||
|
</TableBody>
|
||||||
|
</Table>
|
||||||
|
</TableContainer>
|
||||||
|
</AccordionDetails>
|
||||||
|
</Accordion>
|
||||||
|
|
||||||
|
{crawlData[0][currentCrawlIndex].text && (
|
||||||
|
<Accordion defaultExpanded sx={{ mb: 2 }}>
|
||||||
|
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||||
|
<Box sx={{ display: 'flex', alignItems: 'center' }}>
|
||||||
|
<Typography variant='h6'>
|
||||||
|
Text Content
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</AccordionSummary>
|
||||||
|
<AccordionDetails>
|
||||||
|
<Paper
|
||||||
|
sx={{
|
||||||
|
p: 2,
|
||||||
|
maxHeight: '300px',
|
||||||
|
overflow: 'auto',
|
||||||
|
backgroundColor: darkMode ? '#1e1e1e' : '#f5f5f5'
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Typography
|
||||||
|
component="pre"
|
||||||
|
sx={{
|
||||||
|
whiteSpace: 'pre-wrap',
|
||||||
|
wordBreak: 'break-word',
|
||||||
|
fontFamily: 'monospace',
|
||||||
|
fontSize: '0.75rem',
|
||||||
|
m: 0
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{crawlData[0][currentCrawlIndex].text}
|
||||||
|
</Typography>
|
||||||
|
</Paper>
|
||||||
|
</AccordionDetails>
|
||||||
|
</Accordion>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{crawlData[0][currentCrawlIndex].html && (
|
||||||
|
<Accordion defaultExpanded sx={{ mb: 2 }}>
|
||||||
|
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||||
|
<Box sx={{ display: 'flex', alignItems: 'center' }}>
|
||||||
|
<Typography variant='h6'>
|
||||||
|
HTML
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</AccordionSummary>
|
||||||
|
<AccordionDetails>
|
||||||
|
<Paper
|
||||||
|
sx={{
|
||||||
|
p: 2,
|
||||||
|
maxHeight: '300px',
|
||||||
|
overflow: 'auto',
|
||||||
|
backgroundColor: darkMode ? '#1e1e1e' : '#f5f5f5'
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Typography
|
||||||
|
component="pre"
|
||||||
|
sx={{
|
||||||
|
whiteSpace: 'pre-wrap',
|
||||||
|
wordBreak: 'break-word',
|
||||||
|
fontFamily: 'monospace',
|
||||||
|
fontSize: '0.65rem',
|
||||||
|
m: 0
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{crawlData[0][currentCrawlIndex].html}
|
||||||
|
</Typography>
|
||||||
|
</Paper>
|
||||||
|
</AccordionDetails>
|
||||||
|
</Accordion>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{crawlData[0][currentCrawlIndex].links && crawlData[0][currentCrawlIndex].links.length > 0 && (
|
||||||
|
<Accordion defaultExpanded sx={{ mb: 2 }}>
|
||||||
|
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||||
|
<Box sx={{ display: 'flex', alignItems: 'center' }}>
|
||||||
|
<Typography variant='h6'>
|
||||||
|
Links ({crawlData[0][currentCrawlIndex].links.length})
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</AccordionSummary>
|
||||||
|
<AccordionDetails>
|
||||||
|
<Paper sx={{ maxHeight: 200, overflow: 'auto', p: 1 }}>
|
||||||
|
{crawlData[0][currentCrawlIndex].links.map((link: string, idx: number) => (
|
||||||
|
<Typography key={idx} sx={{ fontSize: '0.75rem', mb: 0.5 }}>
|
||||||
|
{link}
|
||||||
|
</Typography>
|
||||||
|
))}
|
||||||
|
</Paper>
|
||||||
|
</AccordionDetails>
|
||||||
|
</Accordion>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<Box sx={{ display: 'flex', gap: 2, mt: 2 }}>
|
||||||
|
<Button
|
||||||
|
onClick={() => {
|
||||||
|
const currentUrl = crawlData[0][currentCrawlIndex]?.metadata?.url || crawlData[0][currentCrawlIndex]?.url || '';
|
||||||
|
const filename = currentUrl
|
||||||
|
? currentUrl.replace(/^https?:\/\//, '').replace(/\//g, '_').replace(/[^a-zA-Z0-9_.-]/g, '_') + '.json'
|
||||||
|
: `crawl_url_${currentCrawlIndex + 1}.json`;
|
||||||
|
downloadJSON(
|
||||||
|
[crawlData[0][currentCrawlIndex]],
|
||||||
|
filename
|
||||||
|
);
|
||||||
|
}}
|
||||||
|
sx={{
|
||||||
|
color: '#FF00C3',
|
||||||
|
textTransform: 'none',
|
||||||
|
p: 0,
|
||||||
|
minWidth: 'auto',
|
||||||
|
backgroundColor: 'transparent',
|
||||||
|
'&:hover': {
|
||||||
|
backgroundColor: 'transparent',
|
||||||
|
textDecoration: 'underline',
|
||||||
|
},
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
Download This Page as JSON
|
||||||
|
</Button>
|
||||||
|
|
||||||
|
<Button
|
||||||
|
onClick={() => {
|
||||||
|
const firstUrl = crawlData[0][0]?.metadata?.url || crawlData[0][0]?.url || '';
|
||||||
|
const baseFilename = firstUrl
|
||||||
|
? firstUrl.replace(/^https?:\/\//, '').split('/')[0].replace(/[^a-zA-Z0-9_.-]/g, '_')
|
||||||
|
: 'crawl';
|
||||||
|
downloadAllCrawlsAsZip(
|
||||||
|
crawlData[0],
|
||||||
|
`${baseFilename}_all_urls.zip`
|
||||||
|
);
|
||||||
|
}}
|
||||||
|
sx={{
|
||||||
|
color: '#FF00C3',
|
||||||
|
textTransform: 'none',
|
||||||
|
p: 0,
|
||||||
|
minWidth: 'auto',
|
||||||
|
backgroundColor: 'transparent',
|
||||||
|
'&:hover': {
|
||||||
|
backgroundColor: 'transparent',
|
||||||
|
textDecoration: 'underline',
|
||||||
|
},
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
Download All Pages as JSON
|
||||||
|
</Button>
|
||||||
|
</Box>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</AccordionDetails>
|
||||||
|
</Accordion>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{searchData.length > 0 && (
|
||||||
|
<Accordion defaultExpanded sx={{ mb: 2 }}>
|
||||||
|
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||||
|
<Box sx={{ display: 'flex', alignItems: 'center' }}>
|
||||||
|
<Typography variant='h6'>
|
||||||
|
Search Results
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</AccordionSummary>
|
||||||
|
<AccordionDetails>
|
||||||
|
{searchMode === 'scrape' && searchData.length > 0 ? (
|
||||||
|
<>
|
||||||
|
<Box
|
||||||
|
sx={{
|
||||||
|
display: 'flex',
|
||||||
|
overflowX: 'auto',
|
||||||
|
borderBottom: '1px solid',
|
||||||
|
borderColor: darkMode ? '#2a3441' : '#dee2e6',
|
||||||
|
mb: 2,
|
||||||
|
'&::-webkit-scrollbar': {
|
||||||
|
height: '8px',
|
||||||
|
},
|
||||||
|
'&::-webkit-scrollbar-track': {
|
||||||
|
backgroundColor: darkMode ? '#1e1e1e' : '#f1f1f1',
|
||||||
|
},
|
||||||
|
'&::-webkit-scrollbar-thumb': {
|
||||||
|
backgroundColor: darkMode ? '#555' : '#888',
|
||||||
|
borderRadius: '4px',
|
||||||
|
},
|
||||||
|
'&::-webkit-scrollbar-thumb:hover': {
|
||||||
|
backgroundColor: '#FF00C3',
|
||||||
|
},
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{searchData.map((item: any, idx: number) => {
|
||||||
|
const url = item?.metadata?.url || item?.url || `Result ${idx + 1}`;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Box
|
||||||
|
key={idx}
|
||||||
|
onClick={() => setCurrentSearchIndex(idx)}
|
||||||
|
sx={{
|
||||||
|
px: 2,
|
||||||
|
py: 1,
|
||||||
|
cursor: 'pointer',
|
||||||
|
backgroundColor: currentSearchIndex === idx
|
||||||
|
? darkMode ? '#121111ff' : '#e9ecef'
|
||||||
|
: 'transparent',
|
||||||
|
borderBottom: currentSearchIndex === idx ? '3px solid #FF00C3' : 'none',
|
||||||
|
color: darkMode ? '#fff' : '#000',
|
||||||
|
whiteSpace: 'nowrap',
|
||||||
|
fontSize: '0.875rem',
|
||||||
|
flexShrink: 0,
|
||||||
|
}}
|
||||||
|
title={url}
|
||||||
|
>
|
||||||
|
Link {idx + 1}
|
||||||
|
</Box>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
</Box>
|
||||||
|
|
||||||
|
{searchData[currentSearchIndex] && (
|
||||||
|
<>
|
||||||
|
<Accordion defaultExpanded sx={{ mb: 2 }}>
|
||||||
|
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||||
|
<Box sx={{ display: 'flex', alignItems: 'center' }}>
|
||||||
|
<Typography variant='h6'>
|
||||||
|
Metadata
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</AccordionSummary>
|
||||||
|
<AccordionDetails>
|
||||||
|
<TableContainer component={Paper} sx={{ maxHeight: 300 }}>
|
||||||
|
<Table size="small">
|
||||||
|
<TableBody>
|
||||||
|
{searchData[currentSearchIndex].metadata &&
|
||||||
|
Object.entries(searchData[currentSearchIndex].metadata).map(([key, value]: [string, any]) => (
|
||||||
|
<TableRow key={key}>
|
||||||
|
<TableCell sx={{ fontWeight: 500, width: '200px' }}>
|
||||||
|
{key}
|
||||||
|
</TableCell>
|
||||||
|
<TableCell sx={{ wordBreak: 'break-word' }}>
|
||||||
|
{value === undefined || value === '' ? '-' : String(value)}
|
||||||
|
</TableCell>
|
||||||
|
</TableRow>
|
||||||
|
))
|
||||||
|
}
|
||||||
|
</TableBody>
|
||||||
|
</Table>
|
||||||
|
</TableContainer>
|
||||||
|
</AccordionDetails>
|
||||||
|
</Accordion>
|
||||||
|
|
||||||
|
{searchData[currentSearchIndex].text && (
|
||||||
|
<Accordion defaultExpanded sx={{ mb: 2 }}>
|
||||||
|
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||||
|
<Box sx={{ display: 'flex', alignItems: 'center' }}>
|
||||||
|
<Typography variant='h6'>
|
||||||
|
Text Content
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</AccordionSummary>
|
||||||
|
<AccordionDetails>
|
||||||
|
<Paper
|
||||||
|
sx={{
|
||||||
|
p: 2,
|
||||||
|
maxHeight: '300px',
|
||||||
|
overflow: 'auto',
|
||||||
|
backgroundColor: darkMode ? '#1e1e1e' : '#f5f5f5'
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Typography
|
||||||
|
component="pre"
|
||||||
|
sx={{
|
||||||
|
whiteSpace: 'pre-wrap',
|
||||||
|
wordBreak: 'break-word',
|
||||||
|
fontFamily: 'monospace',
|
||||||
|
fontSize: '0.75rem',
|
||||||
|
m: 0
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{searchData[currentSearchIndex].text}
|
||||||
|
</Typography>
|
||||||
|
</Paper>
|
||||||
|
</AccordionDetails>
|
||||||
|
</Accordion>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{searchData[currentSearchIndex].html && (
|
||||||
|
<Accordion defaultExpanded sx={{ mb: 2 }}>
|
||||||
|
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||||
|
<Box sx={{ display: 'flex', alignItems: 'center' }}>
|
||||||
|
<Typography variant='h6'>
|
||||||
|
HTML
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</AccordionSummary>
|
||||||
|
<AccordionDetails>
|
||||||
|
<Paper
|
||||||
|
sx={{
|
||||||
|
p: 2,
|
||||||
|
maxHeight: '300px',
|
||||||
|
overflow: 'auto',
|
||||||
|
backgroundColor: darkMode ? '#1e1e1e' : '#f5f5f5'
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Typography
|
||||||
|
component="pre"
|
||||||
|
sx={{
|
||||||
|
whiteSpace: 'pre-wrap',
|
||||||
|
wordBreak: 'break-word',
|
||||||
|
fontFamily: 'monospace',
|
||||||
|
fontSize: '0.75rem',
|
||||||
|
m: 0
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{searchData[currentSearchIndex].html}
|
||||||
|
</Typography>
|
||||||
|
</Paper>
|
||||||
|
</AccordionDetails>
|
||||||
|
</Accordion>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{searchData[currentSearchIndex].markdown && (
|
||||||
|
<Accordion defaultExpanded sx={{ mb: 2 }}>
|
||||||
|
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||||
|
<Box sx={{ display: 'flex', alignItems: 'center' }}>
|
||||||
|
<Typography variant='h6'>
|
||||||
|
Markdown
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</AccordionSummary>
|
||||||
|
<AccordionDetails>
|
||||||
|
<Paper
|
||||||
|
sx={{
|
||||||
|
p: 2,
|
||||||
|
maxHeight: '300px',
|
||||||
|
overflow: 'auto',
|
||||||
|
backgroundColor: darkMode ? '#1e1e1e' : '#f5f5f5'
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<Typography
|
||||||
|
component="pre"
|
||||||
|
sx={{
|
||||||
|
whiteSpace: 'pre-wrap',
|
||||||
|
wordBreak: 'break-word',
|
||||||
|
fontFamily: 'monospace',
|
||||||
|
fontSize: '0.75rem',
|
||||||
|
m: 0
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{searchData[currentSearchIndex].markdown}
|
||||||
|
</Typography>
|
||||||
|
</Paper>
|
||||||
|
</AccordionDetails>
|
||||||
|
</Accordion>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{searchData[currentSearchIndex].links && searchData[currentSearchIndex].links.length > 0 && (
|
||||||
|
<Accordion defaultExpanded sx={{ mb: 2 }}>
|
||||||
|
<AccordionSummary expandIcon={<ExpandMoreIcon />}>
|
||||||
|
<Box sx={{ display: 'flex', alignItems: 'center' }}>
|
||||||
|
<Typography variant='h6'>
|
||||||
|
Links ({searchData[currentSearchIndex].links.length})
|
||||||
|
</Typography>
|
||||||
|
</Box>
|
||||||
|
</AccordionSummary>
|
||||||
|
<AccordionDetails>
|
||||||
|
<Paper sx={{ maxHeight: 200, overflow: 'auto', p: 1 }}>
|
||||||
|
{searchData[currentSearchIndex].links.map((link: string, idx: number) => (
|
||||||
|
<Typography key={idx} sx={{ fontSize: '0.75rem', mb: 0.5 }}>
|
||||||
|
{link}
|
||||||
|
</Typography>
|
||||||
|
))}
|
||||||
|
</Paper>
|
||||||
|
</AccordionDetails>
|
||||||
|
</Accordion>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<Box sx={{ display: 'flex', alignItems: 'center', mt: 2 }}>
|
||||||
|
<Button
|
||||||
|
onClick={() => {
|
||||||
|
const result = searchData[currentSearchIndex];
|
||||||
|
const filename = `search_result_${currentSearchIndex + 1}.json`;
|
||||||
|
downloadJSON(result, filename);
|
||||||
|
}}
|
||||||
|
sx={{
|
||||||
|
color: '#FF00C3',
|
||||||
|
textTransform: 'none',
|
||||||
|
mr: 2,
|
||||||
|
p: 0,
|
||||||
|
minWidth: 'auto',
|
||||||
|
backgroundColor: 'transparent',
|
||||||
|
'&:hover': {
|
||||||
|
backgroundColor: 'transparent',
|
||||||
|
textDecoration: 'underline',
|
||||||
|
},
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
Download as JSON
|
||||||
|
</Button>
|
||||||
|
</Box>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<TableContainer component={Paper} sx={{ maxHeight: 600 }}>
|
||||||
|
<Table stickyHeader aria-label="search-results-table">
|
||||||
|
<TableHead>
|
||||||
|
<TableRow>
|
||||||
|
<TableCell
|
||||||
|
sx={{
|
||||||
|
backgroundColor: darkMode ? '#11111' : '#f8f9fa',
|
||||||
|
minWidth: '200px'
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
Title
|
||||||
|
</TableCell>
|
||||||
|
<TableCell
|
||||||
|
sx={{
|
||||||
|
backgroundColor: darkMode ? '#11111' : '#f8f9fa',
|
||||||
|
minWidth: '250px'
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
URL
|
||||||
|
</TableCell>
|
||||||
|
<TableCell
|
||||||
|
sx={{
|
||||||
|
backgroundColor: darkMode ? '#11111' : '#f8f9fa',
|
||||||
|
minWidth: '300px'
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
Description
|
||||||
|
</TableCell>
|
||||||
|
</TableRow>
|
||||||
|
</TableHead>
|
||||||
|
|
||||||
|
<TableBody>
|
||||||
|
{searchData.map((result: any, idx: number) => (
|
||||||
|
<TableRow key={idx}>
|
||||||
|
<TableCell>
|
||||||
|
{result.title || '-'}
|
||||||
|
</TableCell>
|
||||||
|
<TableCell>
|
||||||
|
{result.url ? (
|
||||||
|
<Link
|
||||||
|
href={result.url}
|
||||||
|
target="_blank"
|
||||||
|
rel="noopener noreferrer"
|
||||||
|
sx={{
|
||||||
|
color: '#FF00C3',
|
||||||
|
textDecoration: 'none',
|
||||||
|
'&:hover': {
|
||||||
|
textDecoration: 'underline'
|
||||||
|
},
|
||||||
|
wordBreak: 'break-all'
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
{result.url}
|
||||||
|
</Link>
|
||||||
|
) : '-'}
|
||||||
|
</TableCell>
|
||||||
|
<TableCell>
|
||||||
|
{result.description || '-'}
|
||||||
|
</TableCell>
|
||||||
|
</TableRow>
|
||||||
|
))}
|
||||||
|
</TableBody>
|
||||||
|
</Table>
|
||||||
|
</TableContainer>
|
||||||
|
|
||||||
|
<Box sx={{ display: 'flex', alignItems: 'center', mt: 2 }}>
|
||||||
|
<Button
|
||||||
|
onClick={() => {
|
||||||
|
downloadJSON(searchData, 'search_results.json');
|
||||||
|
}}
|
||||||
|
sx={{
|
||||||
|
color: '#FF00C3',
|
||||||
|
textTransform: 'none',
|
||||||
|
mr: 2,
|
||||||
|
p: 0,
|
||||||
|
minWidth: 'auto',
|
||||||
|
backgroundColor: 'transparent',
|
||||||
|
'&:hover': {
|
||||||
|
backgroundColor: 'transparent',
|
||||||
|
textDecoration: 'underline',
|
||||||
|
},
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
Download as JSON
|
||||||
|
</Button>
|
||||||
|
</Box>
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</AccordionDetails>
|
||||||
|
</Accordion>
|
||||||
|
)}
|
||||||
</Box>
|
</Box>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
|||||||
@@ -56,6 +56,7 @@ export interface Data {
|
|||||||
runByScheduleId?: string;
|
runByScheduleId?: string;
|
||||||
browserId: string;
|
browserId: string;
|
||||||
runByAPI?: boolean;
|
runByAPI?: boolean;
|
||||||
|
runBySDK?: boolean;
|
||||||
log: string;
|
log: string;
|
||||||
runId: string;
|
runId: string;
|
||||||
robotId: string;
|
robotId: string;
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ interface RobotMeta {
|
|||||||
pairs: number;
|
pairs: number;
|
||||||
updatedAt: string;
|
updatedAt: string;
|
||||||
params: any[];
|
params: any[];
|
||||||
type?: 'extract' | 'scrape';
|
type?: 'extract' | 'scrape' | 'crawl' | 'search';
|
||||||
url?: string;
|
url?: string;
|
||||||
formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[];
|
formats?: ('markdown' | 'html' | 'screenshot-visible' | 'screenshot-fullpage')[];
|
||||||
isLLM?: boolean;
|
isLLM?: boolean;
|
||||||
|
|||||||
Reference in New Issue
Block a user