diff --git a/minimax_search_browse.py b/minimax_search_browse.py index b2cc988..00be408 100644 --- a/minimax_search_browse.py +++ b/minimax_search_browse.py @@ -100,6 +100,15 @@ def parse_query(query): return tag_dict, real_query +def read_tavily(url): + from tavily import TavilyClient + tavily_client = TavilyClient() + response = tavily_client.extract(urls=[url]) + if response["results"]: + return response["results"][0]["raw_content"] + return "" + + def read_jina(url): headers = { "Authorization": f"Bearer {os.environ.get('JINA_API_KEY', '')}", @@ -225,12 +234,21 @@ def get_browse_results(url, browse_query, max_retry=3): time.sleep(random.uniform(0, 16)) source_text = "" + browse_provider = os.environ.get("BROWSE_PROVIDER", "").lower() + use_tavily = browse_provider == "tavily" or ( + browse_provider != "jina" and os.environ.get("TAVILY_API_KEY") + ) + for retry_cnt in range(max_retry): try: - source_text = read_jina(url) + if use_tavily: + source_text = read_tavily(url) + else: + source_text = read_jina(url) except Exception as e: + provider_name = "tavily" if use_tavily else "jina" print( - f"Read jina {retry_cnt} error: {e}, url: {url}", + f"Read {provider_name} {retry_cnt} error: {e}, url: {url}", file=sys.stderr, flush=True, ) diff --git a/pyproject.toml b/pyproject.toml index cc01259..89eea85 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ dependencies = [ "pyyaml>=6.0", "openai>=1.0.0", "transformers>=4.30.0", + "tavily-python>=0.3.0", ] [project.scripts]