Clustering Example
This example shows how to perform spatial clustering on Points of Interest.
from verus.clustering import GeOPTICS, KMeansHaversine
import pandas as pd
# Load POI data
poi_data = pd.read_csv("../../data/poti/Porto_dataset_buffered.csv")
# Run OPTICS clustering to obtain initial centers
optics = GeOPTICS(
min_samples=5,
xi=0.05,
min_cluster_size=5,
verbose=True
)
optics_results = optics.run(data_source=poi_data)
# Use OPTICS centers to initialize KMeans
if optics_results["centroids"] is not None and len(optics_results["centroids"]) > 1:
centers = optics_results["centroids"]
print(f"Running KMeans with {len(centers)} OPTICS centers")
kmeans = KMeansHaversine(
n_clusters=len(centers),
init="predefined",
random_state=42,
predefined_centers=centers
)
kmeans_results = kmeans.run(
data_source=poi_data,
centers_input=centers
)
# Access clustering results
clusters = kmeans_results["clusters"]
centroids = kmeans_results["centroids"]
print(f"Found {len(centroids)} clusters")
print(f"Cluster distribution:\n{clusters['cluster'].value_counts()}")
Follow this example in the project’s notebooks folder.