openapi: 3.0.3
info:
  title: GYD.AI API
  version: "1.0.0"
  description: |
    Public API contract for Fetch, Map, Crawl, and Search (v1).
    Authentication for external API calls uses `x-api-key` header.
servers:
  - url: https://gyd.ai/api
    description: Production
  - url: http://localhost/api
    description: Local development
tags:
  - name: Fetch
    description: Fetch page content and normalized markdown
  - name: Map
    description: Extract links from a page
  - name: Crawl
    description: Multi-page crawl with downloadable artifacts
  - name: Search
    description: SERP search with AI extraction across Google, Bing, and DuckDuckGo
paths:
  /v1/fetch:
    post:
      tags: [Fetch]
      summary: Create fetch jobs
      description: Queue one or more fetch jobs. Use `urls` for new integrations; `url` is legacy-compatible.
      operationId: createFetchJobs
      security:
        - ApiKeyAuth: []
      parameters:
        - in: header
          name: Idempotency-Key
          required: false
          schema:
            type: string
          description: Optional idempotency key for safe retries.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/FetchCreateRequest'
            examples:
              single:
                summary: Single URL, datacenter proxy
                value:
                  urls: ["https://example.com"]
                  proxy:
                    type: datacenter
              multi:
                summary: Multiple URLs, residential proxy
                value:
                  urls: ["https://example.com", "https://example.org"]
                  proxy:
                    type: residential
              fast_mode:
                summary: Fast mode (skip auto-scroll)
                value:
                  urls: ["https://example.com/article"]
                  fast_mode: true
              with_webhook:
                summary: Webhook delivery on completion
                value:
                  urls: ["https://example.com/products"]
                  proxy:
                    type: residential
                  webhook_url: "https://your-server.com/webhooks/gyd"
                  webhook_secret: "your-hmac-secret"
      responses:
        "200":
          description: Jobs queued
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/FetchCreateResponse'
        "400":
          description: Validation error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        "401":
          description: Missing API key/session
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        "402":
          description: Insufficient credits
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        "403":
          description: Invalid API key
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        "429":
          description: Concurrency limit reached
          content:
            application/json:
              schema:
                type: object
                properties:
                  error:
                    type: string
                  current:
                    type: integer
                  limit:
                    type: integer
                  remaining:
                    type: integer
        "503":
          description: Queue unavailable / no jobs queued
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /v1/fetch/list:
    get:
      tags: [Fetch]
      summary: List fetch jobs
      operationId: listFetchJobs
      security:
        - ApiKeyAuth: []
      parameters:
        - in: query
          name: limit
          required: false
          schema:
            type: integer
            default: 50
            maximum: 200
        - in: query
          name: status
          required: false
          schema:
            type: string
            enum: [queued, running, completed, failed, aborted]
      responses:
        "200":
          description: List of fetch jobs
          content:
            application/json:
              schema:
                type: object
                properties:
                  jobs:
                    type: array
                    items:
                      type: object
                      properties:
                        id:
                          type: integer
                        name:
                          type: string
                        status:
                          type: string
                        job_type:
                          type: string
                          example: fetch
                        url:
                          type: string
                          nullable: true
                        error:
                          type: string
                          nullable: true
                        createdAt:
                          type: string
                          format: date-time
                        started_at:
                          type: string
                          format: date-time
                          nullable: true
                        finished_at:
                          type: string
                          format: date-time
                          nullable: true
                        duration_ms:
                          type: integer
                          nullable: true

  /v1/fetch/{id}/cancel:
    post:
      tags: [Fetch]
      summary: Cancel a fetch job
      description: |
        Aborts a queued or running fetch job. If the worker has not yet started processing,
        it will skip the job. If already running, the worker will stop at the next checkpoint.
        The concurrency slot is released immediately. Refunds are not issued for cancellations.
      operationId: cancelFetchJob
      security:
        - ApiKeyAuth: []
      parameters:
        - in: path
          name: id
          required: true
          schema:
            type: integer
      responses:
        "200":
          description: Job cancelled
          content:
            application/json:
              schema:
                type: object
                properties:
                  id:
                    type: integer
                  status:
                    type: string
                    enum: [aborted]
                  cancelled:
                    type: boolean
        "400":
          description: Job is already in a terminal state
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        "404":
          description: Job not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /v1/fetch/{id}:
    get:
      tags: [Fetch]
      summary: Get fetch job status
      operationId: getFetchJobStatus
      security:
        - ApiKeyAuth: []
      parameters:
        - in: path
          name: id
          required: true
          schema:
            type: integer
      responses:
        "200":
          description: Fetch job details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/FetchStatusResponse'
        "404":
          description: Job not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /v1/map:
    post:
      tags: [Map]
      summary: Create map jobs
      description: Queue one or more map jobs. Use `urls` for new integrations; `url` is legacy-compatible.
      operationId: createMapJobs
      security:
        - ApiKeyAuth: []
      parameters:
        - in: header
          name: Idempotency-Key
          required: false
          schema:
            type: string
          description: Optional idempotency key for safe retries.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/MapCreateRequest'
            examples:
              single:
                value:
                  urls: ["https://example.com"]
                  proxy:
                    type: datacenter
      responses:
        "200":
          description: Jobs queued
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/MapCreateResponse'
        "400":
          description: Validation error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        "401":
          description: Missing API key/session
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        "402":
          description: Insufficient credits
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        "403":
          description: Invalid API key
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        "429":
          description: Concurrency limit reached
          content:
            application/json:
              schema:
                type: object
                properties:
                  error:
                    type: string
                  current:
                    type: integer
                  limit:
                    type: integer
                  remaining:
                    type: integer
        "503":
          description: Queue unavailable / no jobs queued
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /v1/map/list:
    get:
      tags: [Map]
      summary: List map jobs
      operationId: listMapJobs
      security:
        - ApiKeyAuth: []
      parameters:
        - in: query
          name: limit
          required: false
          schema:
            type: integer
            default: 50
            maximum: 200
        - in: query
          name: status
          required: false
          schema:
            type: string
            enum: [queued, running, completed, failed, aborted]
      responses:
        "200":
          description: List of map jobs
          content:
            application/json:
              schema:
                type: object
                properties:
                  jobs:
                    type: array
                    items:
                      type: object
                      properties:
                        id:
                          type: integer
                        name:
                          type: string
                        status:
                          type: string
                        job_type:
                          type: string
                          example: map
                        url:
                          type: string
                          nullable: true
                        error:
                          type: string
                          nullable: true
                        createdAt:
                          type: string
                          format: date-time
                        started_at:
                          type: string
                          format: date-time
                          nullable: true
                        finished_at:
                          type: string
                          format: date-time
                          nullable: true
                        duration_ms:
                          type: integer
                          nullable: true

  /v1/map/{id}/cancel:
    post:
      tags: [Map]
      summary: Cancel a map job
      description: Aborts a queued or running map job. The concurrency slot is released immediately.
      operationId: cancelMapJob
      security:
        - ApiKeyAuth: []
      parameters:
        - in: path
          name: id
          required: true
          schema:
            type: integer
      responses:
        "200":
          description: Job cancelled
          content:
            application/json:
              schema:
                type: object
                properties:
                  id:
                    type: integer
                  status:
                    type: string
                    enum: [aborted]
                  cancelled:
                    type: boolean
        "400":
          description: Job is already in a terminal state
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        "404":
          description: Job not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /v1/map/{id}:
    get:
      tags: [Map]
      summary: Get map job status
      operationId: getMapJobStatus
      security:
        - ApiKeyAuth: []
      parameters:
        - in: path
          name: id
          required: true
          schema:
            type: integer
      responses:
        "200":
          description: Map job details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/MapStatusResponse'
        "404":
          description: Job not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /v1/crawl:
    post:
      tags: [Crawl]
      summary: Create crawl jobs
      description: Queue one or more crawl jobs. Use `urls` for new integrations; `url` is legacy-compatible.
      operationId: createCrawlJobs
      security:
        - ApiKeyAuth: []
      parameters:
        - in: header
          name: Idempotency-Key
          required: false
          schema:
            type: string
          description: Optional idempotency key for safe retries.
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CrawlCreateRequest'
            examples:
              default:
                value:
                  urls: ["https://example.com"]
                  max_depth: 1
                  proxy:
                    type: datacenter
      responses:
        "200":
          description: Jobs queued
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CrawlCreateResponse'
        "400":
          description: Validation error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        "401":
          description: Missing API key/session
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        "402":
          description: Insufficient credits
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        "403":
          description: Invalid API key
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        "429":
          description: Concurrency limit reached
          content:
            application/json:
              schema:
                type: object
                properties:
                  error:
                    type: string
                  current:
                    type: integer
                  limit:
                    type: integer
                  remaining:
                    type: integer
        "503":
          description: Queue unavailable / no jobs queued
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /v1/crawl/list:
    get:
      tags: [Crawl]
      summary: List crawl jobs
      operationId: listCrawlJobs
      security:
        - ApiKeyAuth: []
      parameters:
        - in: query
          name: limit
          required: false
          schema:
            type: integer
            default: 50
            maximum: 200
        - in: query
          name: status
          required: false
          schema:
            type: string
            enum: [queued, running, mapping, completed, failed, aborted]
      responses:
        "200":
          description: List of crawl jobs
          content:
            application/json:
              schema:
                type: object
                properties:
                  jobs:
                    type: array
                    items:
                      type: object
                      properties:
                        id:
                          type: integer
                        name:
                          type: string
                        status:
                          type: string
                        job_type:
                          type: string
                          example: crawl
                        url:
                          type: string
                          nullable: true
                        max_depth:
                          type: integer
                          nullable: true
                        total_tasks:
                          type: integer
                        completed:
                          type: integer
                        failed:
                          type: integer
                        error:
                          type: string
                          nullable: true
                        createdAt:
                          type: string
                          format: date-time
                        started_at:
                          type: string
                          format: date-time
                          nullable: true
                        finished_at:
                          type: string
                          format: date-time
                          nullable: true
                        duration_ms:
                          type: integer
                          nullable: true

  /v1/crawl/{id}/cancel:
    post:
      tags: [Crawl]
      summary: Cancel a crawl job
      description: |
        Aborts a queued or running crawl job. Pending and queued tasks are marked
        cancelled. The concurrency slots are released immediately.
      operationId: cancelCrawlJob
      security:
        - ApiKeyAuth: []
      parameters:
        - in: path
          name: id
          required: true
          schema:
            type: integer
      responses:
        "200":
          description: Job cancelled
          content:
            application/json:
              schema:
                type: object
                properties:
                  id:
                    type: integer
                  status:
                    type: string
                    enum: [aborted]
                  cancelled:
                    type: boolean
        "400":
          description: Job is already in a terminal state
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        "404":
          description: Job not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /v1/crawl/{id}:
    get:
      tags: [Crawl]
      summary: Get crawl job status
      operationId: getCrawlJobStatus
      security:
        - ApiKeyAuth: []
      parameters:
        - in: path
          name: id
          required: true
          schema:
            type: integer
      responses:
        "200":
          description: Crawl job details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CrawlStatusResponse'
        "404":
          description: Job not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /v1/crawl/{id}/rows/{rowIndex}/file:
    get:
      tags: [Crawl]
      summary: Get presigned row file URL
      description: Returns a temporary download URL for row markdown (`md`) or HTML (`html`) artifact.
      operationId: getCrawlRowFile
      security:
        - ApiKeyAuth: []
      parameters:
        - in: path
          name: id
          required: true
          schema:
            type: integer
        - in: path
          name: rowIndex
          required: true
          schema:
            type: integer
            minimum: 0
        - in: query
          name: type
          required: false
          schema:
            type: string
            enum: [md, html]
            default: md
      responses:
        "200":
          description: Presigned URL
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/CrawlRowFileResponse'
        "400":
          description: Invalid row index/type
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        "404":
          description: Crawl job not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /v1/search:
    post:
      tags: [Search]
      summary: Create a search job
      description: |
        Queue a SERP search against Google, Bing, or DuckDuckGo. The engine is
        selected automatically with fallback if blocked. Results include ranked
        organic links; optionally each top result's page is fetched and AI-extracted.
      operationId: createSearchJob
      security:
        - ApiKeyAuth: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/SearchCreateRequest'
            examples:
              basic:
                summary: Simple query
                value:
                  query: "best open source LLMs 2025"
              with_extraction:
                summary: Query with AI page extraction disabled
                value:
                  query: "nvidia h100 price"
                  engine: "bing"
                  numResults: 10
                  extractData: false
              with_locale:
                summary: Localized search
                value:
                  query: "real estate market trends"
                  location: "gb"
                  language: "en"
                  numResults: 5
      responses:
        "200":
          description: Job queued
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SearchCreateResponse'
        "402":
          description: Insufficient credits
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        "429":
          description: Concurrency limit reached
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /v1/search/list:
    get:
      tags: [Search]
      summary: List search jobs
      operationId: listSearchJobs
      security:
        - ApiKeyAuth: []
      parameters:
        - in: query
          name: limit
          required: false
          schema:
            type: integer
            default: 50
            maximum: 200
      responses:
        "200":
          description: List of search jobs
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SearchListResponse'

  /v1/search/{id}:
    get:
      tags: [Search]
      summary: Get search job status and results
      operationId: getSearchJob
      security:
        - ApiKeyAuth: []
      parameters:
        - in: path
          name: id
          required: true
          schema:
            type: integer
      responses:
        "200":
          description: Search job details
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SearchStatusResponse'
        "404":
          description: Job not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /v1/search/{id}/cancel:
    post:
      tags: [Search]
      summary: Cancel a queued search job
      operationId: cancelSearchJob
      security:
        - ApiKeyAuth: []
      parameters:
        - in: path
          name: id
          required: true
          schema:
            type: integer
      responses:
        "200":
          description: Job cancelled
          content:
            application/json:
              schema:
                type: object
                properties:
                  id:
                    type: integer
                  status:
                    type: string
                    example: aborted
                  cancelled:
                    type: boolean
        "404":
          description: Job not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        "409":
          description: Job already in terminal state
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

components:
  securitySchemes:
    ApiKeyAuth:
      type: apiKey
      in: header
      name: x-api-key

  schemas:
    ErrorResponse:
      type: object
      required: [error]
      properties:
        error:
          type: string
        details:
          type: array
          items:
            type: object
            additionalProperties: true
        limits:
          type: object
          additionalProperties: true

    ProxyRequest:
      type: object
      additionalProperties: false
      required: [type]
      properties:
        type:
          type: string
          enum: [datacenter, residential]

    ProxyPublicConfig:
      type: object
      nullable: true
      properties:
        type:
          type: string
          enum: [datacenter, residential]
        mode:
          type: string
          enum: [auto, none]

    RejectedItem:
      type: object
      properties:
        index:
          type: integer
          nullable: true
        url:
          type: string
          nullable: true
        reason:
          type: string
        source:
          type: string
          enum: [validation, enqueue]

    RequestUrls:
      type: object
      properties:
        url:
          type: string
          format: uri
          description: Legacy single URL input.
        urls:
          type: array
          minItems: 1
          items:
            type: string
            format: uri
      description: Provide `urls` for new integrations. `url` is legacy compatible.

    FetchCreateRequest:
      allOf:
        - $ref: '#/components/schemas/RequestUrls'
        - type: object
          properties:
            proxy:
              $ref: '#/components/schemas/ProxyRequest'
            fast_mode:
              type: boolean
              default: false
              description: |
                Skip auto-scroll and extended page settle. Speeds up retrieval on
                pages that do not require JavaScript-driven lazy loading. Default false.
            webhook_url:
              type: string
              format: uri
              description: |
                URL to POST the job result to when the fetch completes.
                The request body will match the `WebhookPayload` schema.
                Delivery is best-effort — failures are not retried.
            webhook_secret:
              type: string
              maxLength: 256
              description: |
                Optional HMAC-SHA256 secret. When provided, each webhook request
                includes an `X-GYD-Signature` header containing
                `sha256=<hex(HMAC-SHA256(secret, raw_body))>`.
                Verify this signature on your server to confirm authenticity.

    MapCreateRequest:
      allOf:
        - $ref: '#/components/schemas/RequestUrls'
        - type: object
          properties:
            proxy:
              $ref: '#/components/schemas/ProxyRequest'
            fast_mode:
              type: boolean
              default: false
              description: |
                Skip auto-scroll and extended page settle. Speeds up retrieval on
                pages that do not require JavaScript-driven lazy loading. Default false.
            webhook_url:
              type: string
              format: uri
              description: |
                URL to POST the job result to when the map completes.
                The request body will match the `WebhookPayload` schema.
                Delivery is best-effort — failures are not retried.
            webhook_secret:
              type: string
              maxLength: 256
              description: |
                Optional HMAC-SHA256 secret. When provided, each webhook request
                includes an `X-GYD-Signature` header containing
                `sha256=<hex(HMAC-SHA256(secret, raw_body))>`.
                Verify this signature on your server to confirm authenticity.

    CrawlCreateRequest:
      allOf:
        - $ref: '#/components/schemas/RequestUrls'
        - type: object
          properties:
            max_depth:
              type: integer
              minimum: 1
              maximum: 5
              default: 1
              description: Depth of link traversal. 1 = root URL only. Max 5.
            proxy:
              $ref: '#/components/schemas/ProxyRequest'
            concurrency:
              type: integer
              minimum: 1
              maximum: 500
              description: |
                Number of parallel row workers for this crawl batch. Defaults to your
                plan's crawl concurrency limit. Cannot exceed your plan maximum.
            fast_mode:
              type: boolean
              default: false
              description: |
                Skip auto-scroll and extended page settle on each crawled page.
                Speeds up large crawls on sites that do not require lazy-loaded content.
            webhook_url:
              type: string
              format: uri
              description: |
                URL to POST the job result to when the crawl completes.
                The request body will match the `WebhookPayload` schema.
                Delivery is best-effort — failures are not retried.
            webhook_secret:
              type: string
              maxLength: 256
              description: |
                Optional HMAC-SHA256 secret. When provided, each webhook request
                includes an `X-GYD-Signature` header containing
                `sha256=<hex(HMAC-SHA256(secret, raw_body))>`.
                Verify this signature on your server to confirm authenticity.

    CreateResponseBase:
      type: object
      required:
        - service
        - status
        - count
        - accepted_count
        - rejected_count
        - duplicates_removed
        - rejected
        - job_ids
        - poll_urls
        - inputs
      properties:
        service:
          type: string
          enum: [fetch, map, crawl]
        status:
          type: string
          enum: [queued]
        count:
          type: integer
        accepted_count:
          type: integer
        rejected_count:
          type: integer
        duplicates_removed:
          type: integer
        rejected:
          type: array
          items:
            $ref: '#/components/schemas/RejectedItem'
        job_ids:
          type: array
          items:
            type: integer
        poll_urls:
          type: array
          items:
            type: string
        inputs:
          type: object
          properties:
            urls:
              type: array
              items:
                type: string
                format: uri
            max_depth:
              type: integer
              nullable: true
            proxy:
              $ref: '#/components/schemas/ProxyPublicConfig'
        job_id:
          type: integer
          nullable: true
        poll_url:
          type: string
          nullable: true
        idempotency_replay:
          type: boolean

    FetchCreateResponse:
      allOf:
        - $ref: '#/components/schemas/CreateResponseBase'
        - type: object
          properties:
            service:
              type: string
              enum: [fetch]

    MapCreateResponse:
      allOf:
        - $ref: '#/components/schemas/CreateResponseBase'
        - type: object
          properties:
            service:
              type: string
              enum: [map]

    CrawlCreateResponse:
      allOf:
        - $ref: '#/components/schemas/CreateResponseBase'
        - type: object
          properties:
            service:
              type: string
              enum: [crawl]

    ProxyUsageItem:
      type: object
      properties:
        stage:
          type: string
          nullable: true
        proxy_type:
          type: string
          nullable: true
        proxy_mode:
          type: string
          nullable: true
        proxy_used:
          type: boolean
        proxy_ip:
          type: string
          nullable: true
        proxy_country:
          type: string
          nullable: true
        proxy_region:
          type: string
          nullable: true
        proxy_city:
          type: string
          nullable: true
        status:
          type: integer
          nullable: true
        success:
          type: boolean
        timestamp:
          type: string
          nullable: true

    RuntimeInfo:
      type: object
      properties:
        fetch_strategy:
          type: string
          nullable: true
        effective_proxy_type:
          type: string
          nullable: true
        decoded_charset:
          type: string
          nullable: true
        attempts:
          type: array
          items:
            type: string
        proxy_usage:
          type: array
          items:
            $ref: '#/components/schemas/ProxyUsageItem'
        proxy_usage_summary:
          type: object
          nullable: true
          additionalProperties: true

    FetchStatusResponse:
      type: object
      properties:
        id:
          type: integer
        service:
          type: string
          enum: [fetch]
        status:
          type: string
        error:
          type: string
          nullable: true
        url:
          type: string
          nullable: true
        final_url:
          type: string
          nullable: true
        http_status:
          type: integer
          nullable: true
        content_size:
          type: integer
          nullable: true
        raw_html_url:
          type: string
          nullable: true
        content_md_url:
          type: string
          nullable: true
        createdAt:
          type: string
          format: date-time
        startedAt:
          type: string
          format: date-time
          nullable: true
        finishedAt:
          type: string
          format: date-time
          nullable: true
        durationMs:
          type: integer
          nullable: true
        inputs:
          type: object
          properties:
            url:
              type: string
              nullable: true
            proxy:
              $ref: '#/components/schemas/ProxyPublicConfig'
        outputs:
          type: object
          properties:
            raw_html_url:
              type: string
              nullable: true
            content_md_url:
              type: string
              nullable: true
            final_url:
              type: string
              nullable: true
            http_status:
              type: integer
              nullable: true
            content_size:
              type: integer
              nullable: true
        counts:
          type: object
          properties:
            total:
              type: integer
            completed:
              type: integer
            failed:
              type: integer
        timestamps:
          type: object
          properties:
            createdAt:
              type: string
              format: date-time
            startedAt:
              type: string
              format: date-time
              nullable: true
            finishedAt:
              type: string
              format: date-time
              nullable: true
            durationMs:
              type: integer
              nullable: true
        runtime:
          $ref: '#/components/schemas/RuntimeInfo'

    MapLinkSources:
      type: object
      nullable: true
      description: Breakdown of where discovered links came from.
      properties:
        sitemap:
          type: integer
          description: Links found only in sitemaps.
        page:
          type: integer
          description: Links found only by parsing the rendered HTML page.
        both:
          type: integer
          description: Links found in both sitemap and page HTML.

    MapStatusResponse:
      type: object
      properties:
        id:
          type: integer
        service:
          type: string
          enum: [map]
        status:
          type: string
          enum: [queued, running, completed, failed, aborted]
        error:
          type: string
          nullable: true
        source_url:
          type: string
          nullable: true
        final_url:
          type: string
          nullable: true
        total_links:
          type: integer
        sources:
          $ref: '#/components/schemas/MapLinkSources'
        raw_html_url:
          type: string
          nullable: true
          description: Presigned URL valid for 24 hours.
        links_json_url:
          type: string
          nullable: true
          description: |
            Presigned URL to a JSON file containing the full link list.
            Valid for 24 hours. Structure: `{ source_url, final_url, total_links, sources, links: [{url, source}] }`.
        links_md_url:
          type: string
          nullable: true
          description: Presigned URL to a Markdown file listing all discovered links. Valid for 24 hours.
        createdAt:
          type: string
          format: date-time
        startedAt:
          type: string
          format: date-time
          nullable: true
        finishedAt:
          type: string
          format: date-time
          nullable: true
        durationMs:
          type: integer
          nullable: true
        inputs:
          type: object
          properties:
            url:
              type: string
              nullable: true
            proxy:
              $ref: '#/components/schemas/ProxyPublicConfig'
        outputs:
          type: object
          properties:
            raw_html_url:
              type: string
              nullable: true
            links_json_url:
              type: string
              nullable: true
            links_md_url:
              type: string
              nullable: true
            total_links:
              type: integer
            sources:
              $ref: '#/components/schemas/MapLinkSources'
        counts:
          type: object
          properties:
            total_links:
              type: integer
            sitemap:
              type: integer
              nullable: true
            page:
              type: integer
              nullable: true
            both:
              type: integer
              nullable: true
        timestamps:
          type: object
          properties:
            createdAt:
              type: string
              format: date-time
            startedAt:
              type: string
              format: date-time
              nullable: true
            finishedAt:
              type: string
              format: date-time
              nullable: true
            durationMs:
              type: integer
              nullable: true
        runtime:
          $ref: '#/components/schemas/RuntimeInfo'

    CrawlRow:
      type: object
      properties:
        row_index:
          type: integer
        url:
          type: string
        depth:
          type: integer
        status:
          type: string
        duration_ms:
          type: integer
          nullable: true
        content_size:
          type: integer
          nullable: true
        md_key:
          type: string
        html_key:
          type: string

    CrawlStatusResponse:
      type: object
      properties:
        id:
          type: integer
        service:
          type: string
          enum: [crawl]
        status:
          type: string
        error:
          type: string
          nullable: true
        total_tasks:
          type: integer
        processed_tasks:
          type: integer
        results_url:
          type: string
          nullable: true
        results_jsonl_url:
          type: string
          nullable: true
        results_csv_url:
          type: string
          nullable: true
        results_zip_url:
          type: string
          nullable: true
        createdAt:
          type: string
          format: date-time
        startedAt:
          type: string
          format: date-time
          nullable: true
        finishedAt:
          type: string
          format: date-time
          nullable: true
        durationMs:
          type: integer
          nullable: true
        inputs:
          type: object
          properties:
            url:
              type: string
              nullable: true
            max_depth:
              type: integer
              nullable: true
            proxy:
              $ref: '#/components/schemas/ProxyPublicConfig'
        outputs:
          type: object
          properties:
            results_url:
              type: string
              nullable: true
            results_jsonl_url:
              type: string
              nullable: true
            results_csv_url:
              type: string
              nullable: true
            results_zip_url:
              type: string
              nullable: true
        counts:
          type: object
          properties:
            total_tasks:
              type: integer
            completed:
              type: integer
            failed:
              type: integer
            processed:
              type: integer
            status_counts:
              type: object
              additionalProperties:
                type: integer
        timestamps:
          type: object
          properties:
            createdAt:
              type: string
              format: date-time
            startedAt:
              type: string
              format: date-time
              nullable: true
            finishedAt:
              type: string
              format: date-time
              nullable: true
            durationMs:
              type: integer
              nullable: true
        runtime:
          $ref: '#/components/schemas/RuntimeInfo'
        rows:
          type: array
          items:
            $ref: '#/components/schemas/CrawlRow'

    WebhookPayload:
      type: object
      description: |
        Payload POSTed to `webhook_url` when a fetch job completes (success or failure).
        When `webhook_secret` was set, verify the `X-GYD-Signature` request header:
        `sha256=<hex(HMAC-SHA256(secret, raw_body))>`.
      properties:
        event:
          type: string
          enum: [fetch.completed, fetch.failed]
        id:
          type: integer
          description: JobBatch ID
        status:
          type: string
          enum: [completed, failed]
        url:
          type: string
          nullable: true
        final_url:
          type: string
          nullable: true
        http_status:
          type: integer
          nullable: true
        content_size:
          type: integer
          nullable: true
        raw_html_url:
          type: string
          nullable: true
          description: Presigned URL valid for 24 hours.
        content_md_url:
          type: string
          nullable: true
          description: Presigned URL valid for 24 hours.
        error:
          type: string
          nullable: true
        warning:
          type: string
          nullable: true
        durationMs:
          type: integer
          nullable: true
        runtime:
          $ref: '#/components/schemas/RuntimeInfo'

    CrawlRowFileResponse:
      type: object
      properties:
        url:
          type: string
        key:
          type: string
        type:
          type: string
          enum: [md, html]
        expires_in:
          type: integer

    SearchCreateRequest:
      type: object
      required: [query]
      properties:
        query:
          type: string
          minLength: 1
          description: The search query string.
          example: "best LLM frameworks 2025"
        engine:
          type: string
          enum: [google, bing, duckduckgo]
          default: google
          description: Preferred search engine. Falls back automatically if blocked.
        location:
          type: string
          default: us
          description: Two-letter country code for geo-targeted results (e.g. `us`, `gb`, `de`).
        language:
          type: string
          default: en
          description: Two-letter language code (e.g. `en`, `de`, `fr`).
        numResults:
          type: integer
          minimum: 1
          maximum: 20
          default: 5
          description: Number of top result pages to extract (when `extractData` is true).
        extractData:
          type: boolean
          default: true
          description: If true, the top N result pages are fetched and AI-extracted into structured JSON.
        webhook_url:
          type: string
          format: uri
          description: Optional URL to receive a POST when the job completes or fails.
        webhook_secret:
          type: string
          description: Optional HMAC secret for webhook signature verification.

    SearchCreateResponse:
      type: object
      properties:
        job_id:
          type: integer
        service:
          type: string
          example: search
        status:
          type: string
          example: queued
        poll_url:
          type: string
          example: /api/v1/search/42
        inputs:
          type: object
          properties:
            query:
              type: string
            engine:
              type: string
            location:
              type: string
            language:
              type: string

    SearchStatusResponse:
      type: object
      properties:
        id:
          type: integer
        service:
          type: string
          example: search
        status:
          type: string
          enum: [queued, running, completed, failed, aborted]
        query:
          type: string
        engine:
          type: string
          description: Engine that actually served results (may differ if fallback triggered).
        requested_engine:
          type: string
        fallback_applied:
          type: boolean
        fallback_chain:
          type: array
          items:
            type: object
            properties:
              from:
                type: string
              to:
                type: string
              reason:
                type: string
        blocked:
          type: boolean
        error:
          type: string
          nullable: true
        total_results:
          type: integer
        num_extracted:
          type: integer
        results:
          type: array
          description: Organic SERP results.
          items:
            $ref: '#/components/schemas/SerpResult'
        outputs:
          type: object
          properties:
            raw_html_url:
              type: string
              nullable: true
              description: Presigned URL for the raw SERP HTML (1h TTL).
            result_csv_url:
              type: string
              nullable: true
              description: Presigned URL for results CSV (24h TTL).
            result_json_url:
              type: string
              nullable: true
              description: Presigned URL for results JSON (24h TTL).
            result_md_url:
              type: string
              nullable: true
              description: Presigned URL for results Markdown (24h TTL).
        inputs:
          type: object
          properties:
            query:
              type: string
            engine:
              type: string
            location:
              type: string
            language:
              type: string
        timestamps:
          type: object
          properties:
            createdAt:
              type: string
              format: date-time
            startedAt:
              type: string
              format: date-time
              nullable: true
            finishedAt:
              type: string
              format: date-time
              nullable: true
            durationMs:
              type: integer
              nullable: true

    SerpResult:
      type: object
      properties:
        rank:
          type: integer
        title:
          type: string
        url:
          type: string
        snippet:
          type: string
          nullable: true

    SearchListResponse:
      type: object
      properties:
        batches:
          type: array
          items:
            type: object
            properties:
              id:
                type: integer
              name:
                type: string
              status:
                type: string
              job_type:
                type: string
                example: search
              total_results:
                type: integer
              num_extracted:
                type: integer
              query:
                type: string
              engine:
                type: string
              createdAt:
                type: string
                format: date-time
              started_at:
                type: string
                format: date-time
                nullable: true
              finished_at:
                type: string
                format: date-time
                nullable: true
              duration_ms:
                type: integer
                nullable: true
