Sources API¶

Detailed guide for managing sources in notebooks with the Python SDK.

Adding Sources¶

Add Web Sources¶

from nblm import NblmClient, GcloudTokenProvider, WebSource

client = NblmClient(
    token_provider=GcloudTokenProvider(),
    project_number="123456789012"
)

# Single web source
response = client.add_sources(
    notebook_id="abc123",
    web_sources=[
        WebSource(url="https://example.com", name="Example Website")
    ]
)

# Multiple web sources
response = client.add_sources(
    notebook_id="abc123",
    web_sources=[
        WebSource(url="https://docs.python.org", name="Python Docs"),
        WebSource(url="https://realpython.com"),  # name is optional
        WebSource(url="https://peps.python.org", name="Python PEPs")
    ]
)

Add Text Sources¶

from nblm import TextSource

response = client.add_sources(
    notebook_id="abc123",
    text_sources=[
        TextSource(content="Meeting notes from 2025-10-25", name="Meeting Notes"),
        TextSource(content="TODO: Review API changes", name="TODO")
    ]
)

Add Video Sources¶

from nblm import VideoSource

response = client.add_sources(
    notebook_id="abc123",
    video_sources=[
        VideoSource(url="https://www.youtube.com/watch?v=VIDEO_ID_1"),
        VideoSource(url="https://www.youtube.com/watch?v=VIDEO_ID_2")
    ]
)

Mix Multiple Source Types¶

from nblm import WebSource, TextSource, VideoSource

response = client.add_sources(
    notebook_id="abc123",
    web_sources=[
        WebSource(url="https://example.com", name="Article")
    ],
    text_sources=[
        TextSource(content="Summary of key points", name="Summary")
    ],
    video_sources=[
        VideoSource(url="https://www.youtube.com/watch?v=VIDEO_ID")
    ]
)

print(f"Added {len(response.sources)} sources")
for source in response.sources:
    print(f"  - {source['name']}")

Validation¶

The SDK validates inputs before making API calls:

from nblm import NblmError

# Empty text content raises error
try:
    client.add_sources(
        notebook_id="abc123",
        text_sources=[TextSource(content="", name="Empty")]
    )
except NblmError as e:
    print(f"Validation error: {e}")  # "text content cannot be empty"

# No sources provided raises error
try:
    client.add_sources(notebook_id="abc123")
except NblmError as e:
    print(f"Validation error: {e}")  # "at least one source must be provided"

Uploading Files¶

Basic Upload¶

response = client.upload_source_file(
    notebook_id="abc123",
    path="/path/to/document.pdf"
)

print(f"Uploaded: {response.source_id}")

With Custom Content Type¶

response = client.upload_source_file(
    notebook_id="abc123",
    path="/path/to/file.txt",
    content_type="text/plain"
)

With Display Name¶

response = client.upload_source_file(
    notebook_id="abc123",
    path="/path/to/research.pdf",
    display_name="Research Paper 2025"
)

Batch Upload¶

from pathlib import Path

files_dir = Path("/path/to/documents")
uploaded = []

for file_path in files_dir.glob("*.pdf"):
    response = client.upload_source_file(
        notebook_id="abc123",
        path=str(file_path)
    )
    uploaded.append(response.source_id)
    print(f"Uploaded: {file_path.name} -> {response.source_id}")

print(f"Total uploaded: {len(uploaded)}")

File Validation¶

The SDK validates files before upload:

from nblm import NblmError

# File not found
try:
    client.upload_source_file(
        notebook_id="abc123",
        path="/nonexistent/file.pdf"
    )
except NblmError as e:
    print(f"Error: {e}")  # "file not found"

# Not a file
try:
    client.upload_source_file(
        notebook_id="abc123",
        path="/path/to/directory"
    )
except NblmError as e:
    print(f"Error: {e}")  # "path is not a file"

# Empty file
try:
    client.upload_source_file(
        notebook_id="abc123",
        path="/path/to/empty.txt"
    )
except NblmError as e:
    print(f"Error: {e}")  # "cannot upload empty files"

Supported File Types¶

The API supports various file types including:

PDF (.pdf)
Text files (.txt)
Word documents (.docx)
And more

Content type is auto-detected from file extension if not specified.

Getting Source Details¶

Get a Specific Source¶

source = client.get_source(
    notebook_id="abc123",
    source_id="source-1"
)

print(f"Name: {source.name}")
print(f"Title: {source.title}")

if source.metadata:
    print(f"Word count: {source.metadata.word_count}")
    print(f"Added: {source.metadata.source_added_timestamp}")

if source.settings:
    print(f"Status: {source.settings.status}")

YouTube Metadata¶

source = client.get_source(notebook_id="abc123", source_id="video-source-1")

if source.metadata and source.metadata.youtube_metadata:
    yt = source.metadata.youtube_metadata
    print(f"Channel: {yt.channel_name}")
    print(f"Video ID: {yt.video_id}")

Deleting Sources¶

Delete Single Source¶

client.delete_sources(
    notebook_id="abc123",
    source_names=[
        "projects/123456789012/locations/global/notebooks/abc123/sources/source-1"
    ]
)

Delete Multiple Sources¶

source_names = [
    "projects/123/locations/global/notebooks/abc123/sources/source-1",
    "projects/123/locations/global/notebooks/abc123/sources/source-2",
    "projects/123/locations/global/notebooks/abc123/sources/source-3"
]

client.delete_sources(
    notebook_id="abc123",
    source_names=source_names
)

Get Source Names from Add Response¶

from nblm import WebSource

# Add sources
response = client.add_sources(
    notebook_id="abc123",
    web_sources=[
        WebSource(url="https://example.com"),
        WebSource(url="https://example.org")
    ]
)

# Extract source names
source_names = [source["name"] for source in response.sources]

# Delete them
client.delete_sources(
    notebook_id="abc123",
    source_names=source_names
)

Common Patterns¶

Create Notebook and Add Sources¶

from nblm import NblmClient, GcloudTokenProvider, WebSource, TextSource

client = NblmClient(
    token_provider=GcloudTokenProvider(),
    project_number="123456789012"
)

# Create notebook
notebook = client.create_notebook(title="Research: Python Best Practices")

# Add initial sources
client.add_sources(
    notebook_id=notebook.notebook_id,
    web_sources=[
        WebSource(url="https://peps.python.org/pep-0008/", name="PEP 8"),
        WebSource(url="https://docs.python-guide.org/", name="Python Guide")
    ],
    text_sources=[
        TextSource(
            content="Focus on code quality and readability",
            name="Project Goals"
        )
    ]
)

print(f"Notebook ready: {notebook.notebook_id}")

Incremental Source Addition¶

notebook_id = "abc123"

# Add web sources first
client.add_sources(
    notebook_id=notebook_id,
    web_sources=[WebSource(url="https://example.com")]
)

# Add text notes later
client.add_sources(
    notebook_id=notebook_id,
    text_sources=[TextSource(content="Notes", name="Notes")]
)

# Upload files separately
client.upload_source_file(
    notebook_id=notebook_id,
    path="document.pdf"
)

Adding Google Drive Documents¶

from nblm import GoogleDriveSource

notebook_id = "abc123"

# Authenticate with Drive-enabled credentials:
#   gcloud auth login --enable-gdrive-access
#   export NBLM_ACCESS_TOKEN=$(gcloud auth print-access-token)

client.add_sources(
    notebook_id=notebook_id,
    drive_sources=[
        GoogleDriveSource(
            document_id="FILE_ID",
            mime_type="application/vnd.google-apps.presentation",
            name="Team Update Slides",
        )
    ],
)

Google Drive File ID

The authenticated account must have view access to the Drive document. Use the Drive web UI to confirm you can open the file before ingesting it. FILE_ID can be extracted from the Drive URL at /d/<ID>/ (e.g., https://drive.google.com/file/d/<ID>/xxx).

Drive Access Validation

The SDK performs a token scope check (drive or drive.file) and verifies that the authenticated account can access the document. Failing either check raises an NblmError immediately, preventing partial uploads.

Bulk Upload from Directory¶

from pathlib import Path

def upload_directory(client: NblmClient, notebook_id: str, directory: str):
    """Upload all files from a directory to a notebook."""
    path = Path(directory)
    uploaded_count = 0

    for file_path in path.iterdir():
        if file_path.is_file():
            try:
                response = client.upload_source_file(
                    notebook_id=notebook_id,
                    path=str(file_path)
                )
                print(f"Uploaded: {file_path.name}")
                uploaded_count += 1
            except Exception as e:
                print(f"Failed to upload {file_path.name}: {e}")

    return uploaded_count

# Usage
count = upload_directory(client, "abc123", "/path/to/documents")
print(f"Uploaded {count} files")

Extract Source IDs¶

# Add sources and save IDs
response = client.add_sources(
    notebook_id="abc123",
    web_sources=[WebSource(url="https://example.com")]
)

# Extract source ID from full name
# Format: "projects/.../notebooks/.../sources/SOURCE_ID"
for source in response.sources:
    full_name = source["name"]
    source_id = full_name.split("/")[-1]
    print(f"Source ID: {source_id}")

Error Handling¶

Validation Errors¶

from nblm import NblmError

# Empty text validation
try:
    client.add_sources(
        notebook_id="abc123",
        text_sources=[TextSource(content="   ", name="Empty")]
    )
except NblmError as e:
    print(f"Validation failed: {e}")

# No sources provided
try:
    client.add_sources(notebook_id="abc123")
except NblmError as e:
    print(f"Validation failed: {e}")

API Errors¶

# Notebook not found
try:
    client.add_sources(
        notebook_id="nonexistent",
        web_sources=[WebSource(url="https://example.com")]
    )
except NblmError as e:
    print(f"API error: {e}")

# Invalid URL
try:
    client.add_sources(
        notebook_id="abc123",
        web_sources=[WebSource(url="not-a-valid-url")]
    )
except NblmError as e:
    print(f"API error: {e}")

Limitations¶

Source listing: No API method to list all sources in a notebook
Source updates: Cannot update existing sources, only add or delete

Next Steps¶

Notebooks API - Create and manage notebooks
Audio API - Create audio overviews
API Reference - Complete API reference
Error Handling - Exception handling