feat: Fix graph visualization to work with latest spanner-graph-notebook code; also, allow visualization when only some columns are json. by ericfe-google · Pull Request #102 · googleapis/python-bigquery-magics · GitHub
Skip to content
This repository was archived by the owner on May 7, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
308faf8
Multi-column support
ericfe-google Mar 11, 2025
22c9e8f
Remove 'rows' field in results, as it's not used by the Javascript. A…
ericfe-google Mar 11, 2025
cbffa75
reformat
ericfe-google Mar 11, 2025
43eb06f
Fix test_bigquery.py tests, remove unnecessary mocking of GraphServer
ericfe-google Mar 11, 2025
46dbcc1
reformat
ericfe-google Mar 11, 2025
57e22c0
Get basic graph visualization working against latest spanner code
ericfe-google Mar 11, 2025
36e0f34
Fix unit tests
ericfe-google Mar 11, 2025
469583e
Ignore columns we don't know how to visualize for visualization purpo…
ericfe-google Mar 11, 2025
e5d4ac0
Merge branch 'main' into graph3
ericfe-google Mar 12, 2025
6223652
reformat
ericfe-google Mar 12, 2025
c929e33
Remove unused dependency on networkx
ericfe-google Mar 12, 2025
3cdf1ac
Implement stub callback for node expansion
ericfe-google Mar 12, 2025
ba92a07
Fix test_bigquery_graph_missing_spanner_deps.
ericfe-google Mar 12, 2025
176c854
reformat
ericfe-google Mar 12, 2025
2d899b4
Add unit test for GraphServerHandler::handler_post_node_expansion()
ericfe-google Mar 12, 2025
3b4903f
Add test for invalid node expansion request
ericfe-google Mar 12, 2025
49d1aac
reformat
ericfe-google Mar 12, 2025
9b4a567
Tweaks to improve code coverage
ericfe-google Mar 12, 2025
502149a
More tweaks to improve code coverage
ericfe-google Mar 12, 2025
148a74c
avoid list comprehension due to code coverage tooling
ericfe-google Mar 12, 2025
ce96f22
Fix visualization in colab.
ericfe-google Mar 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 41 additions & 17 deletions bigquery_magics/bigquery.py
85 changes: 50 additions & 35 deletions bigquery_magics/graph_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
from typing import Dict, List


def execute_node_expansion(params, request):
return {"error": "Node expansion not yet implemented"}


def convert_graph_data(query_results: Dict[str, Dict[str, str]]):
"""
Converts graph data to the form expected by the visualization framework.
Expand Down Expand Up @@ -49,16 +53,12 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]):
# does not even get called unless spanner_graphs has already been confirmed
# to exist upstream.
from google.cloud.spanner_v1.types import StructType, Type, TypeCode
import networkx
from spanner_graphs.conversion import (
columns_to_native_numpy,
prepare_data_for_graphing,
)
from spanner_graphs.conversion import get_nodes_edges

try:
fields: List[StructType.Field] = []
data = {}
rows = []
tabular_data = {}
for key, value in query_results.items():
column_name = None
column_value = None
Expand All @@ -73,45 +73,39 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]):
StructType.Field(name=column_name, type=Type(code=TypeCode.JSON))
)
data[column_name] = []
tabular_data[column_name] = []
for value_key, value_value in column_value.items():
if not isinstance(value_key, str):
raise ValueError(
f"Expected inner key to be str, got {type(value_key)}"
)
if not isinstance(value_value, str):
raise ValueError(
f"Expected inner value to be str, got {type(value_value)}"
)
row_json = json.loads(value_value)

if row_json is not None:
try:
row_json = json.loads(value_value)
data[column_name].append(row_json)
rows.append([row_json])

d, ignored_columns = columns_to_native_numpy(data, fields)

graph: networkx.classes.DiGraph = prepare_data_for_graphing(
incoming=d, schema_json=None
)

nodes = []
for node_id, node in graph.nodes(data=True):
nodes.append(node)

edges = []
for from_id, to_id, edge in graph.edges(data=True):
edges.append(edge)
tabular_data[column_name].append(row_json)
except (ValueError, TypeError):
# Non-JSON columns cannot be visualized, but we still want them
# in the tabular view.
tabular_data[column_name].append(str(value_value))

nodes, edges = get_nodes_edges(data, fields, schema_json=None)

# Convert nodes and edges to json objects.
# (Unfortunately, the code coverage tooling does not allow this
# to be expressed as list comprehension).
nodes_json = []
for node in nodes:
nodes_json.append(node.to_json())
edges_json = []
for edge in edges:
edges_json.append(edge.to_json())

return {
"response": {
# These fields populate the graph result view.
"nodes": nodes,
"edges": edges,
"nodes": nodes_json,
"edges": edges_json,
# This populates the visualizer's schema view, but not yet implemented on the
# BigQuery side.
"schema": None,
# This field is used to populate the visualizer's tabular view.
"query_result": data,
"query_result": tabular_data,
}
}
except Exception as e:
Expand All @@ -133,6 +127,7 @@ class GraphServer:
endpoints = {
"get_ping": "/get_ping",
"post_ping": "/post_ping",
"post_node_expansion": "/post_node_expansion",
"post_query": "/post_query",
}

Expand Down Expand Up @@ -228,13 +223,33 @@ def handle_post_query(self):
response = convert_graph_data(query_results=json.loads(data["params"]))
self.do_data_response(response)

def handle_post_node_expansion(self):
"""Handle POST requests for node expansion.

Expects a JSON payload with:
- params: A JSON string containing connection parameters (project, instance, database, graph)
- request: A dictionary with node details (uid, node_labels, node_properties, direction, edge_label)
"""
data = self.parse_post_data()

# Execute node expansion with:
# - params_str: JSON string with connection parameters (project, instance, database, graph)
# - request: Dict with node details (uid, node_labels, node_properties, direction, edge_label)
self.do_data_response(
execute_node_expansion(
params=data.get("params"), request=data.get("request")
)
)

def do_GET(self):
assert self.path == GraphServer.endpoints["get_ping"]
self.handle_get_ping()

def do_POST(self):
if self.path == GraphServer.endpoints["post_ping"]:
self.handle_post_ping()
elif self.path == GraphServer.endpoints["post_node_expansion"]:
self.handle_post_node_expansion()
else:
assert self.path == GraphServer.endpoints["post_query"]
self.handle_post_query()
Expand Down
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,7 @@
"bigframes": ["bigframes >= 1.17.0"],
"geopandas": ["geopandas >= 1.0.1"],
"spanner-graph-notebook": [
"spanner-graph-notebook >= 1.1.1, <=1.1.1",
"networkx",
"spanner-graph-notebook >= 1.1.3",

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we put an upper bound on this? Or are all the APIs we're using now pretty stable?

If need be, I'd be OK putting an upper bound if/when it becomes a problem. Ideally we'd install the spanner-graph-notebook package from the GitHub repo here in the prerelease_deps session https://github.com/googleapis/python-bigquery-magics/blob/main/noxfile.py#L431 similar to what we do here: https://github.com/googleapis/python-bigquery-pandas/blob/7bac1c7439dcdce09edac6461a5127321451daee/noxfile.py#L345-L351

"portpicker",
],
}
Expand Down
37 changes: 34 additions & 3 deletions tests/unit/test_bigquery.py
Loading