Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 168 additions & 0 deletions apps/sim/app/api/tools/brightdata/dataset/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import { randomUUID } from 'crypto'
import { createLogger } from '@sim/logger'
import { NextResponse } from 'next/server'

const logger = createLogger('BrightDataDatasetAPI')

export const maxDuration = 600

export async function POST(request: Request) {
const requestId = randomUUID().slice(0, 8)

try {
const body = await request.json()
const datasetId = typeof body?.datasetId === 'string' ? body.datasetId : undefined
const apiToken = typeof body?.apiToken === 'string' ? body.apiToken : undefined

if (!datasetId || !apiToken) {
return NextResponse.json({ error: 'Missing required parameters' }, { status: 400 })
}

const params: Record<string, unknown> = { ...body }
params.datasetId = undefined
params.apiToken = undefined

logger.info(`[${requestId}] Triggering dataset`, { datasetId })

const triggerResponse = await fetch(
`https://api.brightdata.com/datasets/v3/trigger?dataset_id=${encodeURIComponent(
datasetId
)}&include_errors=true`,
{
method: 'POST',
headers: {
Authorization: `Bearer ${apiToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify([params]),
}
)

const triggerText = await triggerResponse.text()
let triggerPayload: unknown = triggerText

try {
triggerPayload = JSON.parse(triggerText)
} catch {
triggerPayload = triggerText
}

if (!triggerResponse.ok) {
const errorMessage =
typeof triggerPayload === 'object' && triggerPayload !== null && 'error' in triggerPayload
? String((triggerPayload as { error?: unknown }).error)
: triggerResponse.statusText

logger.error(`[${requestId}] Dataset trigger failed`, {
datasetId,
status: triggerResponse.status,
error: errorMessage,
})

return NextResponse.json(
{ error: errorMessage || 'Dataset trigger failed' },
{ status: triggerResponse.status }
)
}

const snapshotId =
typeof triggerPayload === 'object' &&
triggerPayload !== null &&
'snapshot_id' in triggerPayload
? String((triggerPayload as { snapshot_id?: unknown }).snapshot_id ?? '')
: ''

if (!snapshotId) {
logger.error(`[${requestId}] Dataset trigger missing snapshot ID`, { datasetId })
return NextResponse.json({ error: 'No snapshot ID returned from request' }, { status: 500 })
}

logger.info(`[${requestId}] Dataset triggered`, { datasetId, snapshotId })

const maxAttempts = 600
let attempts = 0

while (attempts < maxAttempts) {
const snapshotResponse = await fetch(
`https://api.brightdata.com/datasets/v3/snapshot/${snapshotId}?format=json`,
{
method: 'GET',
headers: {
Authorization: `Bearer ${apiToken}`,
'Content-Type': 'application/json',
},
}
)

const snapshotText = await snapshotResponse.text()
let snapshotPayload: unknown = snapshotText

try {
snapshotPayload = JSON.parse(snapshotText)
} catch {
snapshotPayload = snapshotText
}

if (!snapshotResponse.ok) {
if (snapshotResponse.status === 400) {
const errorMessage =
typeof snapshotPayload === 'object' &&
snapshotPayload !== null &&
'error' in snapshotPayload
? String((snapshotPayload as { error?: unknown }).error)
: snapshotResponse.statusText

logger.error(`[${requestId}] Dataset snapshot fetch failed`, {
datasetId,
snapshotId,
status: snapshotResponse.status,
error: errorMessage,
})

return NextResponse.json(
{ error: errorMessage || 'Dataset snapshot fetch failed' },
{ status: snapshotResponse.status }
)
}

attempts += 1
await new Promise((resolve) => setTimeout(resolve, 1000))
continue
}

const status =
typeof snapshotPayload === 'object' &&
snapshotPayload !== null &&
'status' in snapshotPayload
? String((snapshotPayload as { status?: unknown }).status ?? '')
: ''

if (['running', 'building', 'starting'].includes(status)) {
attempts += 1
await new Promise((resolve) => setTimeout(resolve, 1000))
continue
}

const snapshotAt =
typeof snapshotPayload === 'object' &&
snapshotPayload !== null &&
'snapshot_at' in snapshotPayload
? String((snapshotPayload as { snapshot_at?: unknown }).snapshot_at ?? '')
: undefined

logger.info(`[${requestId}] Dataset snapshot received`, { datasetId, snapshotId })

return NextResponse.json({
data: snapshotPayload,
snapshot_at: snapshotAt || undefined,
})
}

logger.error(`[${requestId}] Dataset snapshot timed out`, { datasetId, snapshotId })
return NextResponse.json({ error: 'Timeout waiting for dataset snapshot' }, { status: 504 })
} catch (error) {
const message = error instanceof Error ? error.message : 'Dataset fetch failed'
logger.error(`[${requestId}] Dataset fetch failed`, { error: message })
return NextResponse.json({ error: message }, { status: 500 })
}
}
87 changes: 87 additions & 0 deletions apps/sim/app/api/tools/brightdata/scrape-markdown/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import { randomUUID } from 'crypto'
import { createLogger } from '@sim/logger'
import { NextResponse } from 'next/server'

const logger = createLogger('BrightDataScrapeMarkdownAPI')

export async function POST(request: Request) {
const requestId = randomUUID().slice(0, 8)

try {
const body = await request.json()
const url = typeof body?.url === 'string' ? body.url : undefined
const apiToken = typeof body?.apiToken === 'string' ? body.apiToken : undefined
const unlockerZone = typeof body?.unlockerZone === 'string' ? body.unlockerZone : undefined

if (!url || !apiToken) {
return NextResponse.json({ error: 'Missing required parameters' }, { status: 400 })
}

logger.info(`[${requestId}] Scraping URL as markdown`, { url })

const response = await fetch('https://api.brightdata.com/request', {
method: 'POST',
headers: {
Authorization: `Bearer ${apiToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
zone: unlockerZone || 'mcp_unlocker',
url,
format: 'raw',
data_format: 'markdown',
}),
})

const responseText = await response.text()
let payload: unknown = responseText

try {
payload = JSON.parse(responseText)
} catch {
payload = responseText
}

if (!response.ok) {
const errorMessage =
typeof payload === 'object' && payload !== null && 'error' in payload
? String((payload as { error?: unknown }).error)
: response.statusText

logger.error(`[${requestId}] Scraping failed`, {
url,
status: response.status,
error: errorMessage,
})

return NextResponse.json(
{ error: errorMessage || 'Scraping failed' },
{ status: response.status }
)
}

const markdown =
typeof payload === 'object' && payload !== null && 'markdown' in payload
? String((payload as { markdown?: unknown }).markdown ?? '')
: typeof payload === 'string'
? payload
: JSON.stringify(payload)

const title =
typeof payload === 'object' && payload !== null && 'title' in payload
? String((payload as { title?: unknown }).title ?? '')
: undefined

logger.info(`[${requestId}] Scraping completed`, { url })

return NextResponse.json({
markdown,
url,
title: title || undefined,
})
} catch (error) {
const message = error instanceof Error ? error.message : 'Scraping failed'
logger.error(`[${requestId}] Scraping failed`, { error: message })
return NextResponse.json({ error: message }, { status: 500 })
}
}
105 changes: 105 additions & 0 deletions apps/sim/app/api/tools/brightdata/search-engine/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import { randomUUID } from 'crypto'
import { createLogger } from '@sim/logger'
import { NextResponse } from 'next/server'

const logger = createLogger('BrightDataSearchEngineAPI')

export async function POST(request: Request) {
const requestId = randomUUID().slice(0, 8)

try {
const body = await request.json()
const query = typeof body?.query === 'string' ? body.query : undefined
const apiToken = typeof body?.apiToken === 'string' ? body.apiToken : undefined
const unlockerZone = typeof body?.unlockerZone === 'string' ? body.unlockerZone : undefined
const maxResults =
typeof body?.maxResults === 'number'
? body.maxResults
: typeof body?.maxResults === 'string'
? Number(body.maxResults)
: undefined

if (!query || !apiToken) {
return NextResponse.json({ error: 'Missing required parameters' }, { status: 400 })
}

logger.info(`[${requestId}] Searching`, { query, maxResults })

const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(query)}&start=0&brd_json=1`

const response = await fetch('https://api.brightdata.com/request', {
method: 'POST',
headers: {
Authorization: `Bearer ${apiToken}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
zone: unlockerZone || 'mcp_unlocker',
url: searchUrl,
format: 'raw',
data_format: 'parsed_light',
}),
})

const responseText = await response.text()
let payload: unknown = responseText

try {
payload = JSON.parse(responseText)
} catch {
payload = responseText
}

if (!response.ok) {
const errorMessage =
typeof payload === 'object' && payload !== null && 'error' in payload
? String((payload as { error?: unknown }).error)
: response.statusText

logger.error(`[${requestId}] Search failed`, {
query,
status: response.status,
error: errorMessage,
})

return NextResponse.json(
{ error: errorMessage || 'Search failed' },
{ status: response.status }
)
}

let normalizedResults: Array<{ title: string; url: string; snippet: string }> = []

if (typeof payload === 'object' && payload !== null) {
const organic = (payload as { organic?: unknown }).organic
if (Array.isArray(organic)) {
normalizedResults = organic
.map((entry) => {
if (!entry || typeof entry !== 'object') return null
const rawTitle = (entry as { title?: unknown }).title
const rawLink = (entry as { link?: unknown }).link
const rawDescription = (entry as { description?: unknown }).description
const title = typeof rawTitle === 'string' ? rawTitle : ''
const url = typeof rawLink === 'string' ? rawLink : ''
const snippet = typeof rawDescription === 'string' ? rawDescription : ''
if (!title || !url) return null
return { title, url, snippet }
})
.filter(Boolean) as Array<{ title: string; url: string; snippet: string }>
}
}

const maxCount = Number.isFinite(maxResults) ? Number(maxResults) : undefined
const results = maxCount ? normalizedResults.slice(0, maxCount) : normalizedResults

logger.info(`[${requestId}] Search completed`, { resultCount: results.length })

return NextResponse.json({
results,
})
} catch (error) {
const message = error instanceof Error ? error.message : 'Search failed'
logger.error(`[${requestId}] Search failed`, { error: message })
return NextResponse.json({ error: message }, { status: 500 })
}
}
Loading