Merge pull request #57 from taylorwilsdon/google_pse

feat: Add Google Programmable Search Engine (PSE) Tool Server
This commit is contained in:
Tim Jaeryang Baek
2025-09-01 11:52:29 +04:00
committed by GitHub
5 changed files with 388 additions and 0 deletions

View File

@@ -0,0 +1,51 @@
# syntax=docker/dockerfile:1
# Comments are provided throughout this file to help you get started.
# If you need more help, visit the Dockerfile reference guide at
# https://docs.docker.com/go/dockerfile-reference/
# Want to help us make this template better? Share your feedback here: https://forms.gle/ybq9Krt8jtBL3iCk7
ARG PYTHON_VERSION=3.10.12
FROM python:${PYTHON_VERSION}-slim as base
# Prevents Python from writing pyc files.
ENV PYTHONDONTWRITEBYTECODE=1
# Keeps Python from buffering stdout and stderr to avoid situations where
# the application crashes without emitting any logs due to buffering.
ENV PYTHONUNBUFFERED=1
WORKDIR /app
# Create a non-privileged user that the app will run under.
# See https://docs.docker.com/go/dockerfile-user-best-practices/
ARG UID=10001
RUN adduser \
--disabled-password \
--gecos "" \
--home "/nonexistent" \
--shell "/sbin/nologin" \
--no-create-home \
--uid "${UID}" \
appuser
# Download dependencies as a separate step to take advantage of Docker's caching.
# Leverage a cache mount to /root/.cache/pip to speed up subsequent builds.
# Leverage a bind mount to requirements.txt to avoid having to copy them into
# into this layer.
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=bind,source=requirements.txt,target=requirements.txt \
python -m pip install -r requirements.txt
# Switch to the non-privileged user to run the application.
USER appuser
# Copy the source code into the container.
COPY . .
# Expose the port that the application listens on.
EXPOSE 8000
# Run the application.
CMD uvicorn 'main:app' --host=0.0.0.0 --port=8000

View File

@@ -0,0 +1,154 @@
# 🔍 Google PSE Tool Server
A sleek and simple FastAPI-based server to provide web search functionality using Google's Programmable Search Engine (PSE) via the Custom Search JSON API.
📦 Built with:
⚡️ FastAPI • 📜 OpenAPI • 🧰 Python • 🔍 Google Custom Search API
---
## 🚀 Quickstart
Clone the repo and get started in seconds:
```bash
git clone https://github.com/open-webui/openapi-servers
cd openapi-servers/servers/google-pse
# Install dependencies
pip install -r requirements.txt
# Set up environment variables
export GOOGLE_API_KEY="your_google_api_key_here"
export GOOGLE_PSE_CX="your_search_engine_id_here"
# Run the server
uvicorn main:app --host 0.0.0.0 --reload
```
---
## 🔧 Configuration
### Required Environment Variables
- `GOOGLE_API_KEY`: Your Google API key with Custom Search API enabled
- `GOOGLE_PSE_CX`: Your Programmable Search Engine ID
### Getting Your Credentials
1. **Google API Key**:
- Go to the [Google Cloud Console](https://console.cloud.google.com/)
- Create a new project or select an existing one
- Enable the Custom Search JSON API
- Create credentials (API Key)
2. **Programmable Search Engine ID**:
- Visit [Google Programmable Search Engine](https://programmablesearchengine.google.com/)
- Create a new search engine
- Copy the Search Engine ID (cx parameter)
---
## 🔍 About
This server is part of the OpenAPI Tools Collection. Use it to perform web searches programmatically, retrieve search results with metadata, and integrate search functionality into your applications — all wrapped in a developer-friendly OpenAPI interface.
### Features
- 🌐 Web search using Google's Custom Search JSON API
- 🎯 Advanced search parameters (language, country, date restrictions, etc.)
- 📊 Rich metadata including search timing and result counts
- 🔒 Configurable safe search filtering
- 📄 Support for file type and site-specific searches
- 🚀 Fast and reliable RESTful API
Compatible with any OpenAPI-supported ecosystem, including:
- 🌀 FastAPI
- 📘 Swagger UI
- 🧪 API testing tools
- 🤖 AI agents and LLMs
---
## 📡 API Endpoints
### `GET /search`
Perform a web search with comprehensive parameters:
**Required Parameters:**
- `q`: Search query string
**Optional Parameters:**
- `num`: Number of results (1-10, default: 10)
- `start`: Starting index for results (default: 1)
- `safe`: Safe search level ('active' or 'off')
- `lr`: Language restriction (e.g., 'lang_en')
- `cr`: Country restriction (e.g., 'countryUS')
- `dateRestrict`: Date restriction ('d1', 'w1', 'm1', 'y1')
- `exactTerms`: Phrase that must appear in all results
- `excludeTerms`: Terms to exclude from results
- `fileType`: File type filter ('pdf', 'doc', etc.)
- `siteSearch`: Specific site to search
- `siteSearchFilter`: Include ('i') or exclude ('e') site
### `GET /health`
Health check endpoint to verify service status.
---
## 🚧 Customization
Extend the search functionality, add custom filters, or integrate with other APIs. Perfect for:
- 🤖 AI agent web research capabilities
- 📊 Automated content discovery
- 🔍 Custom search applications
- 📈 SEO and market research tools
---
## 🌐 API Documentation
Once running, explore auto-generated interactive docs:
🖥️ Swagger UI: http://localhost:8000/docs
📄 OpenAPI JSON: http://localhost:8000/openapi.json
---
## 🐳 Docker Support
Run with Docker Compose:
```bash
# Set environment variables in your shell or .env file
export GOOGLE_API_KEY="your_api_key"
export GOOGLE_PSE_CX="your_search_engine_id"
# Start the service
docker-compose up --build
```
---
## 📝 Example Usage
```bash
# Basic search
curl "http://localhost:8000/search?q=OpenAI+GPT"
# Advanced search with filters
curl "http://localhost:8000/search?q=machine+learning&num=5&lr=lang_en&dateRestrict=m1"
# Site-specific search
curl "http://localhost:8000/search?q=python+tutorial&siteSearch=github.com&siteSearchFilter=i"
```
---
Made with ❤️ by the Open WebUI community 🌍
Explore more tools ➡️ https://github.com/open-webui/openapi-servers

View File

@@ -0,0 +1,6 @@
services:
server:
build:
context: .
ports:
- 8000:8000

172
servers/google-pse/main.py Normal file
View File

@@ -0,0 +1,172 @@
import os
import requests
from fastapi import FastAPI, HTTPException, Query
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from typing import Optional, List
app = FastAPI(
title="Google Programmable Search Engine API",
version="1.0.0",
description="Provides web search functionality using Google's Custom Search JSON API.",
)
origins = ["*"]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# -------------------------------
# Pydantic models
# -------------------------------
class SearchItem(BaseModel):
title: str = Field(..., description="The title of the search result")
link: str = Field(..., description="The URL of the search result")
snippet: str = Field(..., description="A snippet of text from the search result")
displayLink: Optional[str] = Field(None, description="The display URL")
formattedUrl: Optional[str] = Field(None, description="The formatted URL")
class SearchInformation(BaseModel):
searchTime: float = Field(..., description="The time taken to perform the search")
formattedSearchTime: str = Field(..., description="The formatted search time")
totalResults: str = Field(..., description="The total number of search results")
formattedTotalResults: str = Field(..., description="The formatted total results")
class Queries(BaseModel):
request: Optional[List[dict]] = Field(None, description="The request query parameters")
nextPage: Optional[List[dict]] = Field(None, description="Parameters for the next page")
previousPage: Optional[List[dict]] = Field(None, description="Parameters for the previous page")
class Context(BaseModel):
title: str = Field(..., description="The title of the custom search engine")
class SearchResponse(BaseModel):
kind: str = Field(..., description="The kind of search response")
url: dict = Field(..., description="URL information")
queries: Queries = Field(..., description="Query information")
context: Context = Field(..., description="Search engine context")
searchInformation: SearchInformation = Field(..., description="Information about the search")
items: Optional[List[SearchItem]] = Field(None, description="The search results")
# -------------------------------
# Routes
# -------------------------------
GOOGLE_SEARCH_URL = "https://customsearch.googleapis.com/customsearch/v1"
@app.get("/search", response_model=SearchResponse, summary="Perform a web search")
def search_web(
q: str = Query(..., description="The search query"),
cx: Optional[str] = Query(None, description="The Programmable Search Engine ID (can be set via GOOGLE_PSE_CX env var)"),
api_key: Optional[str] = Query(None, description="Google API key (can be set via GOOGLE_API_KEY env var)"),
num: Optional[int] = Query(10, ge=1, le=10, description="Number of search results to return (1-10)"),
start: Optional[int] = Query(1, ge=1, le=91, description="The index of the first result to return"),
safe: Optional[str] = Query("off", description="Search safety level: 'active' or 'off'"),
lr: Optional[str] = Query(None, description="Language restriction (e.g., 'lang_en')"),
cr: Optional[str] = Query(None, description="Country restriction (e.g., 'countryUS')"),
dateRestrict: Optional[str] = Query(None, description="Date restriction (e.g., 'd1', 'w1', 'm1', 'y1')"),
exactTerms: Optional[str] = Query(None, description="Phrase that all results must contain"),
excludeTerms: Optional[str] = Query(None, description="Terms to exclude from results"),
fileType: Optional[str] = Query(None, description="File type restriction (e.g., 'pdf', 'doc')"),
siteSearch: Optional[str] = Query(None, description="Site to search within or exclude"),
siteSearchFilter: Optional[str] = Query(None, description="Include ('i') or exclude ('e') siteSearch")
):
"""
Performs a web search using Google's Custom Search JSON API.
The API key and search engine ID can be provided as query parameters or set as environment variables:
- GOOGLE_API_KEY: Your Google API key
- GOOGLE_PSE_CX: Your Programmable Search Engine ID
Returns search results in JSON format with metadata about the search.
"""
# Get API key and search engine ID from parameters or environment variables
google_api_key = api_key or os.getenv("GOOGLE_API_KEY")
search_engine_id = cx or os.getenv("GOOGLE_PSE_CX")
if not google_api_key:
raise HTTPException(
status_code=400,
detail="Google API key is required. Provide it as 'api_key' parameter or set GOOGLE_API_KEY environment variable."
)
if not search_engine_id:
raise HTTPException(
status_code=400,
detail="Programmable Search Engine ID is required. Provide it as 'cx' parameter or set GOOGLE_PSE_CX environment variable."
)
# Build request parameters
params = {
"key": google_api_key,
"cx": search_engine_id,
"q": q,
"num": num,
"start": start,
"safe": safe
}
# Add optional parameters if provided
optional_params = {
"lr": lr,
"cr": cr,
"dateRestrict": dateRestrict,
"exactTerms": exactTerms,
"excludeTerms": excludeTerms,
"fileType": fileType,
"siteSearch": siteSearch,
"siteSearchFilter": siteSearchFilter
}
for key, value in optional_params.items():
if value is not None:
params[key] = value
try:
response = requests.get(GOOGLE_SEARCH_URL, params=params)
response.raise_for_status()
data = response.json()
# Check if the response contains an error
if "error" in data:
error_info = data["error"]
raise HTTPException(
status_code=error_info.get("code", 400),
detail=f"Google API Error: {error_info.get('message', 'Unknown error')}"
)
# Validate required fields are present
required_fields = ["kind", "url", "queries", "context", "searchInformation"]
for field in required_fields:
if field not in data:
raise HTTPException(
status_code=500,
detail=f"Unexpected response format from Google API: missing '{field}'"
)
return data
except requests.exceptions.RequestException as e:
raise HTTPException(
status_code=503,
detail=f"Error connecting to Google Custom Search API: {e}"
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"An internal error occurred: {e}"
)
@app.get("/health", summary="Health check endpoint")
def health_check():
"""
Simple health check endpoint to verify the service is running.
"""
return {"status": "healthy", "service": "google-pse"}

View File

@@ -0,0 +1,5 @@
fastapi
uvicorn[standard]
pydantic
python-multipart
requests