flairNLP · MaxDall · Mar 16, 2026 · Mar 16, 2026 · Mar 16, 2026
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -31,7 +31,7 @@ jobs:
       - name: Run pytest
         run: python -m pytest -vv
 
-  mypy:
+  pyright:
     # Containers must run in Linux based operating systems
     runs-on: ubuntu-latest
     steps:
@@ -53,5 +53,5 @@ jobs:
         run: |
           pip install -e .[dev]
 
-      - name: Run mypy
-        run: python -m mypy .
+      - name: Run pyright
+        run: pyright
diff --git a/pyproject.toml b/pyproject.toml
@@ -49,28 +49,22 @@ dependencies = [
 [project.optional-dependencies]
 dev = [
     "pytest~=7.2.2",
-    "mypy==1.9.0",
+    "pyright==1.1.408",
     "ruff==0.15.6",
     # type stubs
     "types-lxml",
     "types-python-dateutil>=2.8, <3",
     "types-requests>=2.28, <3",
     "types-colorama>=0.4, <1",
-    "types-dateparser>=1.2.0, <2"
+    "types-dateparser>=1.2.0, <2",
+    "types-xmltodict>=0.13.0, <1",
+    "types-tqdm>=4.66, <5"
 ]
 
-[tool.mypy]
-check_untyped_defs = true
-disallow_any_generics = true
-ignore_missing_imports = true
-no_implicit_optional = true
-show_error_codes = true
-strict_equality = true
-warn_redundant_casts = true
-warn_return_any = true
-warn_unreachable = true
-warn_unused_configs = true
-no_implicit_reexport = true
+[tool.pyright]
+pythonVersion = "3.8"
+typeCheckingMode = "standard"
+reportMissingImports = false
 
 [tool.ruff]
 line-length = 120

diff --git a/scripts/check_coverage.py b/scripts/check_coverage.py
@@ -308,7 +308,7 @@ def main() -> None:
     if (parsed := parse_coverage_file(txt)) is None:
         raise RuntimeError(f"Couldn't parse latest coverage file for run {latest_run.id}")
 
-    failed_publishers = [publisher for publisher, status in parsed.items() if not status]  # type: ignore[union-attr]
+    failed_publishers = [publisher for publisher, status in parsed.items() if not status]
 
     print(f"Latest run on '{run_time}' with {len(failed_publishers)} failed publishers.")
     print(failed_publishers)

diff --git a/scripts/generate_tables.py b/scripts/generate_tables.py
@@ -91,7 +91,7 @@ def align_tables(tables: Sequence[lxml.html.HtmlElement]) -> None:
 
     for column_index, colum_heads in enumerate(
         more_itertools.transpose(table_heads),
-        start=1,  # type: ignore[attr-defined]
+        start=1,
     ):
         column_texts: List[str] = [
             text for table in tables for text in table.xpath(f"/table/tbody/tr/td[{column_index}]//text()")

diff --git a/scripts/publisher_coverage.py b/scripts/publisher_coverage.py
@@ -47,7 +47,7 @@ def main() -> None:
                     # skip publishers providing no sources for forward crawling
                     print(f"⏩  SKIPPED: {publisher_name!r} - No sources defined")
                     continue
-                if publisher.deprecated:  # type: ignore[attr-defined]
+                if publisher.deprecated:
                     print(f"⏩  SKIPPED: {publisher_name!r} - Deprecated")
                     continue
                 if publisher.__name__ in parsed_arguments.skip:

diff --git a/src/fundus/logging.py b/src/fundus/logging.py
@@ -67,7 +67,7 @@ def add_handler(handler: logging.Handler):
         logger.addHandler(handler)
 
 
-def get_current_config() -> JSONVal:
+def get_current_config() -> Dict[str, JSONVal]:
     """Get the current logging configuration as JSON.
 
     Returns:

diff --git a/src/fundus/parser/base_parser.py b/src/fundus/parser/base_parser.py
@@ -21,6 +21,7 @@
     Union,
     get_args,
     get_origin,
+    overload,
 )
 
 import lxml.html
@@ -131,6 +132,30 @@ def wrapper(func):
     return wrapper(cls)
 
 
+@overload
+def attribute(
+    cls: Callable[..., Any],
+    /,
+    *,
+    priority: Optional[int] = ...,
+    validate: bool = ...,
+    deprecated: Optional[date] = ...,
+    default_factory: Optional[Callable[[], Any]] = ...,
+) -> Any: ...
+
+
+@overload
+def attribute(
+    cls: None = ...,
+    /,
+    *,
+    priority: Optional[int] = ...,
+    validate: bool = ...,
+    deprecated: Optional[date] = ...,
+    default_factory: Optional[Callable[[], Any]] = ...,
+) -> Callable[[Any], Any]: ...
+
+
 def attribute(
     cls=None,
     /,
@@ -139,7 +164,7 @@ def attribute(
     validate: bool = True,
     deprecated: Optional[date] = None,
     default_factory: Optional[Callable[[], Any]] = None,
-):
+) -> Any:
     return _register(
         cls,
         factory=Attribute,
@@ -150,7 +175,15 @@ def attribute(
     )
 
 
-def function(cls=None, /, *, priority: Optional[int] = None):
+@overload
+def function(cls: Callable[..., Any], /, *, priority: Optional[int] = ...) -> Any: ...
+
+
+@overload
+def function(cls: None = ..., /, *, priority: Optional[int] = ...) -> Callable[[Any], Any]: ...
+
+
+def function(cls=None, /, *, priority: Optional[int] = None) -> Any:
     return _register(cls, factory=Function, priority=priority)
 
 
@@ -375,7 +408,7 @@ def predicate(x: object) -> bool:
         mapping: Dict[date, _ParserCache] = {}
         for versioned_parser in sorted(included_parsers, key=lambda parser: parser.VALID_UNTIL):
             validation_date: date
-            if prev := mapping.get(validation_date := versioned_parser.VALID_UNTIL):  # type: ignore
+            if prev := mapping.get(validation_date := versioned_parser.VALID_UNTIL):
                 raise ValueError(
                     f"Found versions {prev.factory.__name__!r} and {versioned_parser.__name__!r} of "
                     f"{str(self)!r} with same validation date.\nMake sure you use class attribute VALID_UNTIL "

diff --git a/src/fundus/parser/data.py b/src/fundus/parser/data.py
@@ -70,7 +70,7 @@ def __init__(self, lds: Iterable[Dict[str, Any]] = ()):
                     self.add_ld(nested)
             else:
                 self.add_ld(ld)
-        self.__xml: Optional[lxml.etree.Element] = None
+        self.__xml: Optional[lxml.etree._Element] = None
 
     def __getstate__(self):
         state = self.__dict__.copy()
@@ -128,7 +128,7 @@ def get_value_by_key_path(self, key_path: List[str], default: Any = None) -> Opt
             tmp = nxt
         return tmp
 
-    def __as_xml__(self) -> lxml.etree.Element:
+    def __as_xml__(self) -> lxml.etree._Element:
         pattern = re.compile("|".join(map(re.escape, self.__xml_transformation_table__.keys())))
 
         def to_unicode_characters(text: str) -> str:
@@ -189,7 +189,7 @@ def xpath_search(self, query: Union[XPath, str], scalar: bool = False):
 
         pattern = re.compile("|".join(map(re.escape, self.__xml_transformation_table__.values())))
 
-        def node2string(n: lxml.etree.Element) -> str:
+        def node2string(n: lxml.etree._Element) -> str:
             node_value = lxml.etree.tostring(n, encoding="unicode").strip()
             if match := self.__value_regex__.match(node_value):
                 return match.group("value")
@@ -299,9 +299,9 @@ def __init__(self, texts: Iterable[str]):
     def __getitem__(self, i: int) -> str: ...
 
     @overload
-    def __getitem__(self, s: slice) -> "TextSequence": ...
+    def __getitem__(self, i: slice) -> "TextSequence": ...
 
-    def __getitem__(self, i):
+    def __getitem__(self, i: Union[int, slice]) -> Union[str, "TextSequence"]:
         return self._data[i] if isinstance(i, int) else type(self)(self._data[i])
 
     def __len__(self) -> int:
@@ -334,14 +334,14 @@ def text(self, join_on: str = "\n\n") -> str:
         return join_on.join(self.as_text_sequence())
 
     def df_traversal(self) -> Iterable[TextSequence]:
-        def recursion(o: object):
+        def recursion(o: object) -> Iterator[TextSequence]:
             if isinstance(o, TextSequence):
                 yield o
             elif isinstance(o, Collection):
                 for el in o:
-                    yield from el
+                    yield from recursion(el)
             else:
-                yield o
+                return
 
         for value in self:
             yield from recursion(value)

diff --git a/src/fundus/parser/utility.py b/src/fundus/parser/utility.py
@@ -28,6 +28,7 @@
 )
 from urllib.parse import urljoin
 
+import lxml.etree
 import lxml.html
 import more_itertools
 import validators
@@ -578,7 +579,7 @@ class CustomParserInfo(parser.parserinfo):
         ("Oct", "October", "Oktober", "Okt"),
         ("Nov", "November"),
         ("Dec", "December", "Dezember", "Dez"),
-    ]  # type: ignore[assignment]
+    ]
     # type ignore due to types-python-dateutil==2.9.0.20251008, see https://github.com/flairNLP/fundus/issues/806
 
 

diff --git a/src/fundus/publishers/base_objects.py b/src/fundus/publishers/base_objects.py
@@ -1,6 +1,6 @@
 from collections import defaultdict
 from textwrap import indent
-from typing import Dict, Iterable, Iterator, List, Optional, Set, Type, Union
+from typing import Dict, Iterable, Iterator, List, Optional, Sequence, Set, Type, Union
 from urllib.robotparser import RobotFileParser
 from warnings import warn
 
@@ -127,7 +127,7 @@ def __init__(
         name: str,
         domain: str,
         parser: Type[ParserProxy],
-        sources: List[URLSource],
+        sources: Sequence[URLSource],
         query_parameter: Optional[Dict[str, str]] = None,
         url_filter: Optional[URLFilter] = None,
         request_header: Optional[Dict[str, str]] = _default_header,

diff --git a/src/fundus/publishers/de/winfuture.py b/src/fundus/publishers/de/winfuture.py
@@ -41,7 +41,7 @@ def body(self) -> Optional[ArticleBody]:
             html_as_string = re.sub(r"(?<=<br>)\n(?!([<\W]))", "\n<p>", html_as_string)
             html_as_string = re.sub(r"(?<=(ipt|div)>)\n(?![\W<])", "\n<p>", html_as_string)
             html_as_string = re.sub(r"(?<![\W>])\n(?=<[a-z0-9=_'\"]*>)", "</p>\n", html_as_string)
-            doc: HtmlElement = fromstring(html_as_string)  # type: ignore
+            doc: HtmlElement = fromstring(html_as_string)
             return extract_article_body_with_selector(
                 doc=doc,
                 paragraph_selector=self._paragraph_selector,

diff --git a/src/fundus/publishers/fr/le_monde.py b/src/fundus/publishers/fr/le_monde.py
@@ -35,7 +35,7 @@ def title(self) -> Optional[str]:
 
         @attribute
         def topics(self) -> List[str]:
-            return self.precomputed.ld.bf_search("keywords")  # type: ignore
+            return self.precomputed.ld.bf_search("keywords")
 
         @attribute
         def publishing_date(self) -> Optional[datetime.datetime]:

diff --git a/src/fundus/publishers/ind/times_of_india.py b/src/fundus/publishers/ind/times_of_india.py
@@ -41,7 +41,7 @@ def body(self) -> Optional[ArticleBody]:
                 r"<div class=\"_s30J clearfix  \">", "<div class=\"_s30J clearfix  \"><p class='intro'>", html_as_string
             )
             return extract_article_body_with_selector(
-                fromstring(html_as_string),  # type: ignore
+                fromstring(html_as_string),
                 summary_selector=self._summary_selector,
                 paragraph_selector=self._paragraph_selector,
                 subheadline_selector=self._subheadline_selector,

diff --git a/src/fundus/scraping/article.py b/src/fundus/scraping/article.py
@@ -130,12 +130,12 @@ def to_json(self, *attributes: str) -> Dict[str, JSONVal]:
 
         def serialize(v: Any) -> JSONVal:
             if hasattr(v, "serialize"):
-                return v.serialize()  # type: ignore[no-any-return]
+                return v.serialize()
             elif isinstance(v, datetime):
                 return str(v)
             elif not is_jsonable(v):
                 raise TypeError(f"Attribute {attribute!r} of type {type(v)!r} is not JSON serializable")
-            return v  # type: ignore[no-any-return]
+            return v
 
         serialization: Dict[str, JSONVal] = {}
         for attribute in attributes: