-
Notifications
You must be signed in to change notification settings - Fork 19
ENH: implement pooled score feature #76
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
d1b52ce
300d65c
a326b43
5bd12c5
05356c7
aafd84f
0ae38b4
36fb0f2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -148,6 +148,10 @@ class GWRandomForestClassifier(BaseClassifier): | |||||
| Pooled out-of-bag (OOB) true labels across all fitted local models. | ||||||
| oob_pred_pooled_ : numpy.ndarray | ||||||
| Pooled out-of-bag (OOB) predictions/scores across all fitted local models. | ||||||
| oob_pooled_score_ : float | ||||||
| Accuracy computed from all out-of-bag predictions pooled together. | ||||||
| score_ : float | ||||||
| Alias for ``oob_pooled_score_``. | ||||||
|
|
||||||
| Examples | ||||||
| -------- | ||||||
|
|
@@ -297,6 +301,32 @@ def fit( | |||||
|
|
||||||
| return self | ||||||
|
|
||||||
| @property | ||||||
| def oob_pooled_score_(self) -> float: | ||||||
| """Accuracy on pooled out-of-bag predictions vs pooled OOB true labels. | ||||||
|
|
||||||
| Returns | ||||||
| ------- | ||||||
| float | ||||||
| Accuracy computed from all out-of-bag predictions pooled together. | ||||||
| """ | ||||||
| if self.oob_y_pooled_.size == 0 or self.oob_pred_pooled_.size == 0: | ||||||
| return float("nan") | ||||||
| y_true = self.oob_y_pooled_.ravel() | ||||||
| y_pred = self.oob_pred_pooled_.ravel() | ||||||
| return (y_true == y_pred).mean() | ||||||
|
|
||||||
| @property | ||||||
| def score_(self) -> float: | ||||||
| """Alias for oob_pooled_score_. | ||||||
|
|
||||||
| Returns | ||||||
| ------- | ||||||
| float | ||||||
| Accuracy computed from all out-of-bag predictions pooled together. | ||||||
| """ | ||||||
| return self.oob_pooled_score_ | ||||||
|
|
||||||
| def _get_score_data( | ||||||
| self, | ||||||
| local_model: BaseEstimator, | ||||||
|
|
@@ -646,6 +676,10 @@ class GWRandomForestRegressor(BaseRegressor): | |||||
| Pooled out-of-bag (OOB) true values across all fitted local models. | ||||||
| oob_pred_pooled_ : numpy.ndarray | ||||||
| Pooled out-of-bag (OOB) predictions across all fitted local models. | ||||||
| oob_pooled_score_ : float | ||||||
| R² computed from all out-of-bag predictions pooled together. | ||||||
| score_ : float | ||||||
| Alias for ``oob_pooled_score_``. | ||||||
|
|
||||||
| Examples | ||||||
| -------- | ||||||
|
|
@@ -789,6 +823,34 @@ def fit( | |||||
|
|
||||||
| return self | ||||||
|
|
||||||
| @property | ||||||
| def oob_pooled_score_(self) -> float: | ||||||
| """R² on pooled out-of-bag predictions vs pooled OOB true values. | ||||||
|
|
||||||
| Returns | ||||||
| ------- | ||||||
| float | ||||||
| R² computed from all out-of-bag predictions pooled together. | ||||||
| """ | ||||||
| if len(self.oob_y_pooled_) == 0: | ||||||
| return float("nan") | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. changed in latest commit. |
||||||
| y_true = self.oob_y_pooled_.ravel() | ||||||
| y_pred = self.oob_pred_pooled_.ravel() | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you're rigth, i'll remove .ravel() from oob_pooled_score and move it to _get_oob_score_data. |
||||||
| ss_res = ((y_true - y_pred) ** 2).sum() | ||||||
| ss_tot = ((y_true - y_true.mean()) ** 2).sum() | ||||||
| return 1 - ss_res / ss_tot if ss_tot != 0 else float("nan") | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as above. |
||||||
|
|
||||||
| @property | ||||||
| def score_(self) -> float: | ||||||
| """Alias for oob_pooled_score_. | ||||||
|
|
||||||
| Returns | ||||||
| ------- | ||||||
| float | ||||||
| R² computed from all out-of-bag predictions pooled together. | ||||||
| """ | ||||||
| return self.oob_pooled_score_ | ||||||
|
|
||||||
| def _get_score_data( | ||||||
| self, | ||||||
| local_model: BaseEstimator, | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -138,6 +138,10 @@ class GWLogisticRegression(BaseClassifier): | |
| left_out_w_ : np.ndarray | ||
| Array of weights on left out observations in local models when | ||
| ``leave_out`` is set. | ||
| pooled_score_ : float | ||
| Accuracy computed from all local model predictions pooled together. | ||
| score_ : float | ||
| Alias for ``pooled_score_``. | ||
|
|
||
| Examples | ||
| -------- | ||
|
|
@@ -164,7 +168,6 @@ class GWLogisticRegression(BaseClassifier): | |
| dtype: boolean | ||
| """ | ||
|
|
||
| # TODO: score_ should be an alias of pooled_score_ - this is different from MGWR | ||
| def __init__( | ||
| self, | ||
| bandwidth: float | None = None, | ||
|
|
@@ -261,6 +264,30 @@ def fit(self, X: pd.DataFrame, y: pd.Series, geometry: gpd.GeoSeries | None = No | |
|
|
||
| return self | ||
|
|
||
| @property | ||
| def pooled_score_(self) -> float: | ||
| """Accuracy on pooled predictions vs pooled true labels. | ||
|
|
||
| Returns | ||
| ------- | ||
| float | ||
| Accuracy computed from all local model predictions pooled together. | ||
| """ | ||
| if self.y_pooled_.size == 0 or self.pred_pooled_.size == 0: | ||
| return float("nan") | ||
| return (self.y_pooled_ == self.pred_pooled_).mean() | ||
|
|
||
| @property | ||
| def score_(self) -> float: | ||
| """Alias for pooled_score_. | ||
|
|
||
| Returns | ||
| ------- | ||
| float | ||
| Accuracy computed from all local model predictions pooled together. | ||
| """ | ||
| return self.pooled_score_ | ||
|
|
||
| def _get_score_data( | ||
| self, | ||
| local_model: BaseEstimator, | ||
|
|
@@ -382,6 +409,10 @@ class GWLinearRegression(BaseRegressor): | |
| each location | ||
| local_intercept_ : pd.Series | ||
| Local intercept values at each location | ||
| pooled_score_ : float | ||
| R² computed from all local model predictions pooled together. | ||
| score_ : float | ||
| Alias for ``pooled_score_``. | ||
|
|
||
| Examples | ||
| -------- | ||
|
|
@@ -484,4 +515,34 @@ def fit(self, X: pd.DataFrame, y: pd.Series, geometry: gpd.GeoSeries | None = No | |
| [x[1] for x in self._score_data], index=self._names | ||
| ) | ||
|
|
||
| # Store pooled y for score computation | ||
| self.y_pooled_ = y.values | ||
| self.pred_pooled_ = self.pred_.values | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is wrong. This not not pooled y, this is focal y and focal prediction.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
|
|
||
| return self | ||
|
|
||
| @property | ||
| def pooled_score_(self) -> float: | ||
| """R² on pooled predictions vs pooled true values. | ||
|
|
||
| Returns | ||
| ------- | ||
| float | ||
| R² computed from all local model predictions pooled together. | ||
| """ | ||
| if len(self.y_pooled_) == 0: | ||
| return float("nan") | ||
| ss_res = ((self.y_pooled_ - self.pred_pooled_) ** 2).sum() | ||
| ss_tot = ((self.y_pooled_ - self.y_pooled_.mean()) ** 2).sum() | ||
| return 1 - ss_res / ss_tot if ss_tot != 0 else float("nan") | ||
|
|
||
| @property | ||
| def score_(self) -> float: | ||
| """Alias for pooled_score_. | ||
|
|
||
| Returns | ||
| ------- | ||
| float | ||
| R² computed from all local model predictions pooled together. | ||
| """ | ||
| return self.pooled_score_ | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use
sklearn.metrics.acccuracy, do not reimplement.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, you're right.. changed in latest commit.