pymc-devs · Armavica · Sep 17, 2024 · Sep 17, 2024 · Sep 17, 2024 · Sep 17, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -16,6 +16,19 @@ repos:
     -   id: requirements-txt-fixer
         exclude: ^requirements-dev\.txt$
     -   id: trailing-whitespace
+- repo: https://github.com/pre-commit/pygrep-hooks
+  rev: v1.10.0
+  hooks:
+    - id: python-check-blanket-noqa
+    - id: python-check-blanket-type-ignore
+    - id: python-check-mock-methods
+    # - id: python-no-eval  # gets confused with all the `.eval()`
+    - id: python-no-log-warn
+    - id: python-use-type-annotations
+    - id: rst-backticks
+    - id: rst-directive-colons
+    - id: rst-inline-touching-normal
+    - id: text-unicode-replacement-char
 - repo: https://github.com/citation-file-format/cffconvert
   rev: 054bda51dbe278b3e86f27c890e3f3ac877d616c
   hooks:
@@ -39,7 +52,7 @@ repos:
   rev: v0.6.5
   hooks:
     - id: ruff
-      args: ["--fix", "--output-format=full"]
+      args: [--fix, --show-fixes]
     - id: ruff-format
 - repo: https://github.com/MarcoGorelli/madforhooks
   rev: 0.4.1

diff --git a/docs/source/api.rst b/docs/source/api.rst
@@ -41,10 +41,10 @@ Plots, stats and diagnostics are delegated to the
 library, a general purpose library for
 "exploratory analysis of Bayesian models".
 
-* Functions from the `arviz.plots` module are available through ``pymc.<function>`` or ``pymc.plots.<function>``,
+* Functions from the ``arviz.plots`` module are available through ``pymc.<function>`` or ``pymc.plots.<function>``,
   but for their API documentation please refer to the :ref:`ArviZ documentation <arviz:plot_api>`.
 
-* Functions from the `arviz.stats` module are available through ``pymc.<function>`` or ``pymc.stats.<function>``,
+* Functions from the ``arviz.stats`` module are available through ``pymc.<function>`` or ``pymc.stats.<function>``,
   but for their API documentation please refer to the :ref:`ArviZ documentation <arviz:stats_api>`.
 
 ArviZ is a dependency of PyMC and so, in addition to the locations described above,

diff --git a/docs/source/api/distributions/discrete.rst b/docs/source/api/distributions/discrete.rst
@@ -23,4 +23,4 @@ Discrete
 .. note::
 
    **OrderedLogistic and OrderedProbit:**
-   The `OrderedLogistic` and `OrderedProbit` distributions expect the observed values to be 0-based, i.e., they should range from `0` to `K-1`. Using 1-based indexing (like `1, 2, 3,...K`) can result in errors.
+   The ``OrderedLogistic`` and ``OrderedProbit`` distributions expect the observed values to be 0-based, i.e., they should range from ``0`` to ``K-1``. Using 1-based indexing (like ``1, 2, 3,...K``) can result in errors.
diff --git a/docs/source/api/shape_utils.rst b/docs/source/api/shape_utils.rst
@@ -4,9 +4,9 @@ shape_utils
 
 This submodule contains various functions that apply numpy's broadcasting rules to shape tuples, and also to samples drawn from probability distributions.
 
-The main challenge when broadcasting samples drawn from a generative model, is that each random variate has a core shape. When we draw many i.i.d samples from a given RV, for example if we ask for `size_tuple` i.i.d draws, the result usually is a `size_tuple + RV_core_shape`. In the generative model's hierarchy, the downstream RVs that are conditionally dependent on our above sampled values, will get an array with a shape that is inconsistent with the core shape they expect to see for their parameters. This is a problem sometimes because it prevents regular broadcasting in complex hierarchical models, and thus make prior and posterior predictive sampling difficult.
+The main challenge when broadcasting samples drawn from a generative model, is that each random variate has a core shape. When we draw many i.i.d samples from a given RV, for example if we ask for ``size_tuple`` i.i.d draws, the result usually is a ``size_tuple + RV_core_shape``. In the generative model's hierarchy, the downstream RVs that are conditionally dependent on our above sampled values, will get an array with a shape that is inconsistent with the core shape they expect to see for their parameters. This is a problem sometimes because it prevents regular broadcasting in complex hierarchical models, and thus make prior and posterior predictive sampling difficult.
 
-This module introduces functions that are made aware of the requested `size_tuple` of i.i.d samples, and does the broadcasting on the core shapes, transparently ignoring or moving the i.i.d `size_tuple` prepended axes around.
+This module introduces functions that are made aware of the requested ``size_tuple`` of i.i.d samples, and does the broadcasting on the core shapes, transparently ignoring or moving the i.i.d ``size_tuple`` prepended axes around.
 
 .. currentmodule:: pymc.distributions.shape_utils
 

diff --git a/docs/source/guides/Gaussian_Processes.rst b/docs/source/guides/Gaussian_Processes.rst
@@ -126,7 +126,7 @@ variable models and also some fast approximations.  Their usage all follows a
 similar pattern:  First, a GP is instantiated with a mean function and a
 covariance function.  Then, GP objects can be added together, allowing for
 function characteristics to be carefully modeled and separated.  Finally, one
-of `prior`, `marginal_likelihood` or `conditional` methods is called on the GP
+of ``prior``, ``marginal_likelihood`` or ``conditional`` methods is called on the GP
 object to actually construct the PyMC random variable that represents the
 function prior.
 
@@ -148,7 +148,7 @@ conditioned on.
   or other, depending on the implementation.  See the notebooks for examples.
   The :code:`conditional` method works similarly.
 
-Calling the `prior` method will create a PyMC random variable that represents
+Calling the ``prior`` method will create a PyMC random variable that represents
 the latent function :math:`f(x) = \mathbf{f}`::
 
     f = gp.prior("f", X)
@@ -218,7 +218,7 @@ thesis <https://www.cs.toronto.edu/~duvenaud/thesis.pdf>`_.
 
 The GP objects in PyMC keeps track of these marginals automatically.  The
 following code sketch shows how to define the conditional distribution of
-:math:`f_2^*`.  We use `gp.Marginal` in the example, but the same works for
+:math:`f_2^*`.  We use ``gp.Marginal`` in the example, but the same works for
 other implementations.  The first block fits the GP prior.  We denote
 :math:`f_1 + f_2` as just :math:`f` for brevity::
 
@@ -255,7 +255,7 @@ arguments are required for conditionals of :math:`f1` and :math:`f2`, but not
 
 .. note::
   When constructing conditionals, the additional arguments :code:`X`, :code:`y`,
-  :code:`noise` and :code:`gp` must be provided as a dict called `given`!
+  :code:`noise` and :code:`gp` must be provided as a dict called ``given``!
 
 Since the marginal likelihoood method of :code:`gp1` or :code:`gp2` weren't called,
 their conditionals need to be provided with the required inputs.  In the same

diff --git a/docs/source/learn/core_notebooks/Gaussian_Processes.rst b/docs/source/learn/core_notebooks/Gaussian_Processes.rst
@@ -123,8 +123,8 @@ variable models and also some fast approximations.  Their usage all follows a
 similar pattern:  First, a GP is instantiated with a mean function and a
 covariance function.  Then, GP objects can be added together, allowing for
 function characteristics to be carefully modeled and separated.  Finally, one
-of `prior`, `marginal_likelihood` or `conditional` methods is called on the GP
-object to actually construct the PyMC random variable that represents the
+of ``prior``, ``marginal_likelihood`` or ``conditional`` methods is called on
+the GP object to actually construct the PyMC random variable that represents the
 function prior.
 
 Using :code:`gp.Latent` for the example, the syntax to first specify the GP
@@ -145,7 +145,7 @@ conditioned on.
   or other, depending on the implementation.  See the notebooks for examples.
   The :code:`conditional` method works similarly.
 
-Calling the `prior` method will create a PyMC random variable that represents
+Calling the ``prior`` method will create a PyMC random variable that represents
 the latent function :math:`f(x) = \mathbf{f}`::
 
     f = gp.prior("f", X)
@@ -217,7 +217,7 @@ thesis <https://www.cs.toronto.edu/~duvenaud/thesis.pdf>`_.
 
 The GP objects in PyMC keeps track of these marginals automatically.  The
 following code sketch shows how to define the conditional distribution of
-:math:`f_2^*`.  We use `gp.Marginal` in the example, but the same works for
+:math:`f_2^*`.  We use ``gp.Marginal`` in the example, but the same works for
 other implementations.  The first block fits the GP prior.  We denote
 :math:`f_1 + f_2` as just :math:`f` for brevity::
 
@@ -254,7 +254,7 @@ arguments are required for conditionals of :math:`f1` and :math:`f2`, but not
 
 .. note::
   When constructing conditionals, the additional arguments :code:`X`, :code:`y`,
-  :code:`sigma` and :code:`gp` must be provided as a dict called `given`!
+  :code:`sigma` and :code:`gp` must be provided as a dict called ``given``!
 
 Since the marginal likelihoood method of :code:`gp1` or :code:`gp2` weren't called,
 their conditionals need to be provided with the required inputs.  In the same

diff --git a/pymc/backends/__init__.py b/pymc/backends/__init__.py
@@ -85,8 +85,8 @@
     RunType: TypeAlias = Run
     HAS_MCB = True
 except ImportError:
-    TraceOrBackend = BaseTrace  # type: ignore
-    RunType = type(None)  # type: ignore
+    TraceOrBackend = BaseTrace  # type: ignore[misc]
+    RunType = type(None)  # type: ignore[assignment, misc]
 
 
 __all__ = ["to_inference_data", "predictions_to_inference_data"]

diff --git a/pymc/distributions/shape_utils.py b/pymc/distributions/shape_utils.py
@@ -260,13 +260,13 @@ def change_dist_size(
 
     """
     # Check the dimensionality of the `new_size` kwarg
-    new_size_ndim = np.ndim(new_size)  # type: ignore
+    new_size_ndim = np.ndim(new_size)  # type: ignore[arg-type]
     if new_size_ndim > 1:
         raise ShapeError("The `new_size` must be ≤1-dimensional.", actual=new_size_ndim)
     elif new_size_ndim == 0:
-        new_size = (new_size,)  # type: ignore
+        new_size = (new_size,)  # type: ignore[assignment]
     else:
-        new_size = tuple(new_size)  # type: ignore
+        new_size = tuple(new_size)  # type: ignore[arg-type]
 
     op = dist.owner.op
     new_dist = _change_dist_size(op, dist, new_size=new_size, expand=expand)
@@ -331,7 +331,7 @@ def change_specify_shape_size(op, ss, new_size, expand) -> TensorVariable:
             new_shapes[-ndim_supp:] = shapes[-ndim_supp:]
 
     # specify_shape has a wrong signature https://github.com/aesara-devs/aesara/issues/1164
-    return pt.specify_shape(new_var, new_shapes)  # type: ignore
+    return pt.specify_shape(new_var, new_shapes)  # type: ignore[arg-type]
 
 
 def get_support_shape(
@@ -395,8 +395,7 @@ def get_support_shape(
             raise ValueError(f"Number of dims is too small for ndim_supp of {ndim_supp}")
         model = modelcontext(None)
         inferred_support_shape = [
-            model.dim_lengths[dims[i]] - support_shape_offset[i]  # type: ignore
-            for i in np.arange(-ndim_supp, 0)
+            model.dim_lengths[dims[i]] - support_shape_offset[i] for i in np.arange(-ndim_supp, 0)
         ]
 
     if inferred_support_shape is None and observed is not None:

diff --git a/pymc/distributions/simulator.py b/pymc/distributions/simulator.py
@@ -144,7 +144,7 @@ def __new__(cls, name, *args, **kwargs):
         return super().__new__(cls, name, *args, **kwargs)
 
     @classmethod
-    def dist(  # type: ignore
+    def dist(  # type: ignore[override]
         cls,
         fn,
         *unnamed_params,
@@ -256,7 +256,7 @@ def rv_op(
         return sim_op(*params, **kwargs)
 
 
-@_support_point.register(SimulatorRV)  # type: ignore
+@_support_point.register(SimulatorRV)
 def simulator_support_point(op, rv, *inputs):
     sim_inputs = op.dist_params(rv.owner)
     # Take the mean of 10 draws

diff --git a/pymc/distributions/transforms.py b/pymc/distributions/transforms.py
@@ -21,7 +21,7 @@
 
 # ignore mypy error because it somehow considers that
 # "numpy.core.numeric has no attribute normalize_axis_tuple"
-from numpy.core.numeric import normalize_axis_tuple  # type: ignore
+from numpy.core.numeric import normalize_axis_tuple  # type: ignore[attr-defined]
 from pytensor.graph import Op
 from pytensor.tensor import TensorVariable
 

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
@@ -147,7 +147,7 @@ def __array_wrap__(self, result):
 
     @staticmethod
     def _alloc(X, *shape: int) -> TensorVariable:
-        return pt.alloc(X, *shape)  # type: ignore
+        return pt.alloc(X, *shape)  # type: ignore[return-value]
 
 
 class Covariance(BaseCovariance):

diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py
@@ -425,7 +425,7 @@ def prior(
         gp_dims: str | None = None,
         *args,
         **kwargs,
-    ):  # type: ignore
+    ):
         R"""
         Returns the (approximate) GP prior distribution evaluated over the input locations `X`.
         For usage examples, refer to `pm.gp.Latent`.
@@ -488,7 +488,7 @@ def _build_conditional(self, Xnew):
         elif self._parametrization == "centered":
             return self.mean_func(Xnew) + phi[:, i:] @ beta
 
-    def conditional(self, name: str, Xnew: TensorLike, dims: str | None = None):  # type: ignore
+    def conditional(self, name: str, Xnew: TensorLike, dims: str | None = None):  # type: ignore[override]
         R"""
         Returns the (approximate) conditional distribution evaluated over new input locations
         `Xnew`.
@@ -683,7 +683,7 @@ def prior_linearized(self, X: TensorLike):
         psd = self.scale * self.cov_func.power_spectral_density_approx(J)
         return (phi_cos, phi_sin), psd
 
-    def prior(self, name: str, X: TensorLike, dims: str | None = None):  # type: ignore
+    def prior(self, name: str, X: TensorLike, dims: str | None = None):  # type: ignore[override]
         R"""
         Returns the (approximate) GP prior distribution evaluated over the input locations `X`.
         For usage examples, refer to `pm.gp.Latent`.
@@ -705,8 +705,8 @@ def prior(self, name: str, X: TensorLike, dims: str | None = None):  # type: ign
         # and so does not contribute to the approximation.
         f = (
             self.mean_func(X)
-            + phi_cos @ (psd * self._beta[:m])  # type: ignore
-            + phi_sin[..., 1:] @ (psd[1:] * self._beta[m:])  # type: ignore
+            + phi_cos @ (psd * self._beta[:m])  # type: ignore[index]
+            + phi_sin[..., 1:] @ (psd[1:] * self._beta[m:])  # type: ignore[index]
         )
 
         self.f = pm.Deterministic(name, f, dims=dims)
@@ -734,7 +734,7 @@ def _build_conditional(self, Xnew):
         phi = phi_cos @ (psd * beta[:m]) + phi_sin[..., 1:] @ (psd[1:] * beta[m:])
         return self.mean_func(Xnew) + phi
 
-    def conditional(self, name: str, Xnew: TensorLike, dims: str | None = None):  # type: ignore
+    def conditional(self, name: str, Xnew: TensorLike, dims: str | None = None):  # type: ignore[override]
         R"""
         Returns the (approximate) conditional distribution evaluated over new input locations
         `Xnew`.

diff --git a/pymc/logprob/basic.py b/pymc/logprob/basic.py
@@ -588,7 +588,7 @@ def transformed_conditional_logp(
     }
     if values_to_transforms:
         # There seems to be an incorrect type hint in TransformValuesRewrite
-        transform_rewrite = TransformValuesRewrite(values_to_transforms)  # type: ignore
+        transform_rewrite = TransformValuesRewrite(values_to_transforms)  # type: ignore[arg-type]
 
     kwargs.setdefault("warn_rvs", False)
     temp_logp_terms = conditional_logp(

diff --git a/pymc/logprob/tensor.py b/pymc/logprob/tensor.py
@@ -34,7 +34,6 @@
 #   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 #   SOFTWARE.
 
-
 from pathlib import Path
 
 from pytensor import tensor as pt
@@ -165,7 +164,7 @@ def find_measurable_stacks(fgraph, node) -> list[TensorVariable] | None:
     # the IR construction
     replacements = [(base_var, promised_valued_rv(base_var)) for base_var in base_vars]
     temp_fgraph = FunctionGraph(outputs=base_vars, clone=False)
-    toposort_replace(temp_fgraph, replacements)  # type: ignore
+    toposort_replace(temp_fgraph, replacements)  # type: ignore[arg-type]
     new_base_vars = temp_fgraph.outputs
 
     if is_join:
@@ -182,7 +181,7 @@ class MeasurableDimShuffle(MeasurableOp, DimShuffle):
 
     # Need to get the absolute path of `c_func_file`, otherwise it tries to
     # find it locally and fails when a new `Op` is initialized
-    c_func_file = str(DimShuffle.get_path(Path(DimShuffle.c_func_file)))
+    c_func_file = str(DimShuffle.get_path(Path(DimShuffle.c_func_file)))  # type: ignore[arg-type]
 
 
 @_logprob.register(MeasurableDimShuffle)

diff --git a/pymc/model/transform/optimization.py b/pymc/model/transform/optimization.py
@@ -77,14 +77,14 @@ def freeze_dims_and_data(
         if isinstance(datum, SharedVariable)
     }
 
-    old_outs, old_coords, old_dim_lenghts = fg.outputs, fg._coords, fg._dim_lengths  # type: ignore
+    old_outs, old_coords, old_dim_lenghts = fg.outputs, fg._coords, fg._dim_lengths  # type: ignore[attr-defined]
     # Rebuild strict will force the recreation of RV nodes with updated static types
-    new_outs = clone_replace(old_outs, replace=frozen_replacements, rebuild_strict=False)  # type: ignore
+    new_outs = clone_replace(old_outs, replace=frozen_replacements, rebuild_strict=False)  # type: ignore[arg-type]
     for old_out, new_out in zip(old_outs, new_outs):
         new_out.name = old_out.name
     fg = FunctionGraph(outputs=new_outs, clone=False)
-    fg._coords = old_coords  # type: ignore
-    fg._dim_lengths = {  # type: ignore
+    fg._coords = old_coords  # type: ignore[attr-defined]
+    fg._dim_lengths = {  # type: ignore[attr-defined]
         dim: frozen_replacements.get(dim_length, dim_length)
         for dim, dim_length in old_dim_lenghts.items()
     }
@@ -99,7 +99,7 @@ def freeze_dims_and_data(
         if transform is None:
             new_value = rv.type()
         else:
-            new_value = transform.forward(rv, *rv.owner.inputs).type()  # type: ignore
+            new_value = transform.forward(rv, *rv.owner.inputs).type()  # type: ignore[arg-type]
         new_value.name = old_value.name
         replacements[old_value] = new_value
     fg.replace_all(tuple(replacements.items()), import_missing=True)

diff --git a/pymc/model_graph.py b/pymc/model_graph.py
@@ -234,7 +234,7 @@ def _make_node(
         kwargs["cluster"] = cluster
 
     var_name: str = cast(str, node.var.name)
-    add_node(var_name.replace(":", "&"), **kwargs)  # type: ignore
+    add_node(var_name.replace(":", "&"), **kwargs)  # type: ignore[call-arg]
 
 
 class ModelGraph:

diff --git a/pymc/sampling/forward.py b/pymc/sampling/forward.py
@@ -219,7 +219,7 @@ def shared_value_matches(var):
     # Walk the graph from inputs to outputs and tag the volatile variables
     nodes: list[Variable] = general_toposort(
         fg.outputs, deps=lambda x: x.owner.inputs if x.owner else []
-    )  # type: ignore
+    )  # type: ignore[call-overload]
     volatile_nodes: set[Any] = set()
     for node in nodes:
         if (
@@ -446,7 +446,7 @@ def sample_prior_predictive(
     )
 
     # All model variables have a name, but mypy does not know this
-    _log.info(f"Sampling: {list(sorted(volatile_basic_rvs, key=lambda var: var.name))}")  # type: ignore
+    _log.info(f"Sampling: {list(sorted(volatile_basic_rvs, key=lambda var: var.name))}")  # type: ignore[arg-type, return-value]
     values = zip(*(sampler_fn() for i in range(draws)))
 
     data = {k: np.stack(v) for k, v in zip(names, values)}
@@ -850,7 +850,7 @@ def sample_posterior_predictive(
     )
     sampler_fn = point_wrapper(_sampler_fn)
     # All model variables have a name, but mypy does not know this
-    _log.info(f"Sampling: {list(sorted(volatile_basic_rvs, key=lambda var: var.name))}")  # type: ignore
+    _log.info(f"Sampling: {list(sorted(volatile_basic_rvs, key=lambda var: var.name))}")  # type: ignore[arg-type, return-value]
     ppc_trace_t = _DefaultTrace(samples)
 
     progress = CustomProgress(

diff --git a/pymc/sampling/mcmc.py b/pymc/sampling/mcmc.py
@@ -220,15 +220,15 @@ def assign_step_methods(
             has_gradient = getattr(var, "dtype") not in discrete_types
             if has_gradient:
                 try:
-                    tg.grad(model_logp, var)  # type: ignore
+                    tg.grad(model_logp, var)  # type: ignore[arg-type]
                 except (NotImplementedError, tg.NullTypeGradError):
                     has_gradient = False
 
             # select the best method
             rv_var = model.values_to_rvs[var]
             selected = max(
                 methods_list,
-                key=lambda method, var=rv_var, has_gradient=has_gradient: method._competence(  # type: ignore
+                key=lambda method, var=rv_var, has_gradient=has_gradient: method._competence(  # type: ignore[misc]
                     var, has_gradient
                 ),
             )

diff --git a/pymc/sampling/parallel.py b/pymc/sampling/parallel.py
@@ -359,8 +359,8 @@
                     raise multiprocessing.TimeoutError()
                 process.join(timeout)
         except multiprocessing.TimeoutError:
-            logger.warn(
-                "Chain processes did not terminate as expected. " "Terminating forcefully..."
+            logger.warning(
+                "Chain processes did not terminate as expected. Terminating forcefully..."
             )
             for process in processes:
                 process.terminate()