diff --git a/.github/workflows/build_docs.yml b/.github/workflows/build_docs.yml index 4359ce2a..101b167a 100644 --- a/.github/workflows/build_docs.yml +++ b/.github/workflows/build_docs.yml @@ -6,9 +6,6 @@ on: branches: [main] tags: - "v*" - pull_request: - branches: - - "*" workflow_dispatch: inputs: version: @@ -47,17 +44,6 @@ jobs: - name: Make docs run: cd docs; make html - - name: Zip documentation - if: ${{ github.event_name == 'pull_request' }} - run: zip docs_artifact.zip docs/build/html -r - - - name: Upload artifact - if: ${{ github.event_name == 'pull_request' }} - uses: actions/upload-artifact@v6 - with: - name: docs_artifact - path: docs_artifact.zip - - name: Publish to Github Pages on main (dev) if: ${{ github.ref == 'refs/heads/main' }} uses: peaceiris/actions-gh-pages@v4 diff --git a/gwlearn/base.py b/gwlearn/base.py index 06776548..abda4e55 100644 --- a/gwlearn/base.py +++ b/gwlearn/base.py @@ -9,64 +9,23 @@ import numpy as np import pandas as pd from joblib import Parallel, delayed, dump, load -from libpysal import graph +from libpysal import graph, kernels from scipy.spatial import KDTree from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin from sklearn.model_selection import train_test_split __all__ = ["BaseClassifier", "BaseRegressor"] - -def _triangular(distances: np.ndarray, bandwidth: np.ndarray | float) -> np.ndarray: - u = np.clip(distances / bandwidth, 0, 1) - return 1 - u - - -def _parabolic(distances: np.ndarray, bandwidth: np.ndarray | float) -> np.ndarray: - u = np.clip(distances / bandwidth, 0, 1) - return 1 - u**2 - - -def _gaussian(distances: np.ndarray, bandwidth: np.ndarray | float) -> np.ndarray: - u = distances / bandwidth - return np.exp(-((u / 2) ** 2)) - - -def _bisquare(distances: np.ndarray, bandwidth: np.ndarray | float) -> np.ndarray: - u = np.clip(distances / bandwidth, 0, 1) - return (1 - u**2) ** 2 - - -def _cosine(distances: np.ndarray, bandwidth: np.ndarray | float) -> np.ndarray: - u = np.clip(distances / bandwidth, 0, 1) - return np.cos(np.pi / 2 * u) - - -def _exponential(distances: np.ndarray, bandwidth: np.ndarray | float) -> np.ndarray: - u = distances / bandwidth - return np.exp(-u) - - -def _boxcar(distances: np.ndarray, bandwidth: np.ndarray | float) -> np.ndarray: - r = (distances < bandwidth).astype(int) - return r - - -def _tricube(distances: np.ndarray, bandwidth: np.ndarray | float) -> np.ndarray: - u = np.clip(distances / bandwidth, 0, 1) - return (1 - u**3) ** 3 - - -_kernel_functions = { - "triangular": _triangular, - "parabolic": _parabolic, - # "gaussian": _gaussian, - "bisquare": _bisquare, - "tricube": _tricube, - "cosine": _cosine, - "boxcar": _boxcar, - # "exponential": _exponential, -} +_kernel_functions = ( + "triangular", + "parabolic", + # "gaussian", + "bisquare", + "tricube", + "cosine", + "boxcar", + # "exponential", +) class _BaseModel(BaseEstimator): @@ -140,7 +99,7 @@ def _build_weights(self) -> graph.Graph: if self.fixed: # fixed distance weights = graph.Graph.build_kernel( self.geometry, - kernel=_kernel_functions[self.kernel], + kernel=self.kernel, bandwidth=self.bandwidth, ) else: # adaptive KNN @@ -154,7 +113,7 @@ def _build_weights(self) -> graph.Graph: # the epsilon comes from MGWR to avoid division by zero bandwidth = weights._adjacency.groupby(level=0).transform("max") * 1.0000001 weights = graph.Graph( - adjacency=_kernel_functions[self.kernel](weights._adjacency, bandwidth), + adjacency=kernels.kernel(weights._adjacency, bandwidth, kernel=self.kernel, decay=True), is_sorted=True, ) if self.include_focal: @@ -417,11 +376,13 @@ def _prepare_prediction_neighborhoods( geometry, predicate="dwithin", distance=self.bandwidth ) local_ids = self._local_models.index[indices_array.flatten()].to_numpy() - distance = _kernel_functions[self.kernel]( + distance = kernels.kernel( self.geometry.iloc[indices_array].distance( geometry.iloc[input_ids], align=False ), bw, + kernel=self.kernel, + decay=True, ) else: training_coords = self.geometry.get_coordinates() @@ -440,7 +401,9 @@ def _prepare_prediction_neighborhoods( kernel_bandwidth = ( pd.Series(distances).groupby(input_ids).transform("max") + 1e-6 ) # can't have 0 - distance = _kernel_functions[self.kernel](distances, kernel_bandwidth) + distance = kernels.kernel( + distances, kernel_bandwidth, kernel=self.kernel, decay=True + ) split_indices = np.where(np.diff(input_ids))[0] + 1 local_model_ids = np.split(local_ids, split_indices) diff --git a/gwlearn/tests/test_linear_model.py b/gwlearn/tests/test_linear_model.py index eb390f8f..879539cb 100644 --- a/gwlearn/tests/test_linear_model.py +++ b/gwlearn/tests/test_linear_model.py @@ -70,17 +70,17 @@ def test_gwlogistic_fit_basic(sample_data): # noqa: F811 pd.testing.assert_series_equal( model.local_coef_.mean(), pd.Series( - [-0.0004301675501645129, -0.0620546230731815, 0.06715275989171457], + [-0.0004210459967078064, -0.06094184628894879, 0.06581659904328681], index=["Crm_prs", "Litercy", "Wealth"], ), check_exact=False, - atol=0.001, + atol=0.005, ) # Check structure of intercepts assert isinstance(model.local_intercept_, pd.Series) assert len(model.local_intercept_) == len(X) - assert pytest.approx(7.8, abs=0.1) == model.local_intercept_.mean() + assert pytest.approx(7.65, abs=0.1) == model.local_intercept_.mean() def test_gwlogistic_coefficients_structure(sample_data): # noqa: F811