From 4bb4380457b9cfa852edabde8d42d5bf26a1649e Mon Sep 17 00:00:00 2001 From: Taddeus Buica Date: Wed, 15 Jan 2025 03:41:32 +0200 Subject: [PATCH 01/10] fix bugs with context --- inference.py | 5 ++++- utils.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/inference.py b/inference.py index d87ad9e..cffc4f6 100755 --- a/inference.py +++ b/inference.py @@ -2,6 +2,7 @@ from openai import OpenAI import openai import os, anthropic, json +from utils import clip_tokens TOKENS_IN = dict() TOKENS_OUT = dict() @@ -29,7 +30,7 @@ def curr_cost_est(): } return sum([costmap_in[_]*TOKENS_IN[_] for _ in TOKENS_IN]) + sum([costmap_out[_]*TOKENS_OUT[_] for _ in TOKENS_OUT]) -def query_model(model_str, prompt, system_prompt, openai_api_key=None, anthropic_api_key=None, tries=5, timeout=5.0, temp=None, print_cost=True, version="1.5"): +def query_model(model_str, prompt, system_prompt, openai_api_key=None, anthropic_api_key=None, tries=5, timeout=5.0, temp=None, print_cost=True, version="1.5", max_context_tokens=128000): preloaded_api = os.getenv('OPENAI_API_KEY') if openai_api_key is None and preloaded_api is not None: openai_api_key = preloaded_api @@ -47,6 +48,8 @@ def query_model(model_str, prompt, system_prompt, openai_api_key=None, anthropic messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}] + + messages = clip_tokens(messages, model=model_str, max_tokens=max_context_tokens) if version == "0.28": if temp is None: completion = openai.ChatCompletion.create( diff --git a/utils.py b/utils.py index a163273..47c4faa 100755 --- a/utils.py +++ b/utils.py @@ -74,7 +74,7 @@ def save_to_file(location, filename, data): print(f"Error saving file {filename}: {e}") -def clip_tokens(messages, model="gpt-4", max_tokens=100000): +def clip_tokens(messages, model="o1-mini", max_tokens=128000): enc = tiktoken.encoding_for_model(model) total_tokens = sum([len(enc.encode(message["content"])) for message in messages]) From e0990b62bf1b3574cdbdd7695588ed5793ef3daa Mon Sep 17 00:00:00 2001 From: Taddeus Buica Date: Wed, 15 Jan 2025 09:37:38 +0200 Subject: [PATCH 02/10] Add support for file --- ai_lab_repo.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ai_lab_repo.py b/ai_lab_repo.py index dbe9541..7c05161 100755 --- a/ai_lab_repo.py +++ b/ai_lab_repo.py @@ -611,6 +611,7 @@ def parse_arguments(): help='Total number of paper-solver steps' ) + parser.add_argument('--file-path', type=str, default=None) return parser.parse_args() @@ -622,6 +623,8 @@ def parse_arguments(): human_mode = args.copilot_mode.lower() == "true" compile_pdf = args.compile_latex.lower() == "true" load_existing = args.load_existing.lower() == "true" + file_path = args.file_path + try: num_papers_lit_review = int(args.num_papers_lit_review.lower()) except Exception: @@ -654,6 +657,13 @@ def parse_arguments(): else: research_topic = args.research_topic + if file_path and "{FILE}" in research_topic: + with open(file_path, 'r', encoding='utf-8') as f: + file_content = f.read() + # Replace the placeholder with the entire file text + research_topic = research_topic.replace("{FILE}", file_content) + + task_notes_LLM = [ {"phases": ["plan formulation"], "note": f"You should come up with a plan for TWO experiments."}, From 6b78684a76105b47deb81bf633e0f481a23bc9af Mon Sep 17 00:00:00 2001 From: Taddeus Buica Date: Wed, 15 Jan 2025 18:23:55 +0200 Subject: [PATCH 03/10] more fixes --- ai_lab_repo.py | 48 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/ai_lab_repo.py b/ai_lab_repo.py index 7c05161..cebb8ec 100755 --- a/ai_lab_repo.py +++ b/ai_lab_repo.py @@ -446,10 +446,13 @@ def literature_review(self): @return: (bool) whether to repeat the phase """ arx_eng = ArxivSearch() - max_tries = self.max_steps * 5 # lit review often requires extra steps + max_tries = self.max_steps * 5 # lit review often requires extra steps + # get initial response from PhD agent resp = self.phd.inference(self.research_topic, "literature review", step=0, temp=0.8) - if self.verbose: print(resp, "\n~~~~~~~~~~~") + if self.verbose: + print(resp, "\n~~~~~~~~~~~") + # iterate until max num tries to complete task is exhausted for _i in range(max_tries): feedback = str() @@ -463,14 +466,30 @@ def literature_review(self): # grab full text from arxiv ID elif "```FULL_TEXT" in resp: query = extract_prompt(resp, "FULL_TEXT") - # expiration timer so that paper does not remain in context too long - arxiv_paper = f"```EXPIRATION {self.arxiv_paper_exp_time}\n" + arx_eng.retrieve_full_paper_text(query) + "```" + try: + # expiration timer so that paper does not remain in context too long + full_text_content = arx_eng.retrieve_full_paper_text(query) + except Exception as e: + # Catch any unexpected errors from arxiv.Client() + err_msg = f"[ERROR] Could not retrieve paper. Possibly invalid arXiv ID. Error: {e}" + full_text_content = err_msg + + # In case retrieve_full_paper_text returns an error string + # or if we want to unify it, e.g. "[ERROR] ...something" + if full_text_content.startswith("[ERROR]"): + # We won't crash; just pass that as feedback + arxiv_paper = f"```EXPIRATION {self.arxiv_paper_exp_time}\n{full_text_content}```" + else: + # normal successful retrieval + arxiv_paper = f"```EXPIRATION {self.arxiv_paper_exp_time}\n{full_text_content}```" + feedback = arxiv_paper # if add paper, extract and add to lit review, provide feedback elif "```ADD_PAPER" in resp: query = extract_prompt(resp, "ADD_PAPER") feedback, text = self.phd.add_review(query, arx_eng) + # If we want to store reference text for later usage if len(self.reference_papers) < self.num_ref_papers: self.reference_papers.append(text) @@ -478,6 +497,7 @@ def literature_review(self): if len(self.phd.lit_review) >= self.num_papers_lit_review: # generate formal review lit_review_sum = self.phd.format_review() + # if human in loop -> check if human is happy with the produced review if self.human_in_loop_flag["literature review"]: retry = self.human_in_loop("literature review", lit_review_sum) @@ -485,18 +505,32 @@ def literature_review(self): if retry: self.phd.lit_review = [] return retry + # otherwise, return lit review and move on to next stage - if self.verbose: print(self.phd.lit_review_sum) + if self.verbose: + print(self.phd.lit_review_sum) # set agent self.set_agent_attr("lit_review_sum", lit_review_sum) # reset agent state self.reset_agents() self.statistics_per_phase["literature review"]["steps"] = _i return False - resp = self.phd.inference(self.research_topic, "literature review", feedback=feedback, step=_i + 1, temp=0.8) - if self.verbose: print(resp, "\n~~~~~~~~~~~") + + # Move on to the next iteration with new feedback + resp = self.phd.inference( + self.research_topic, + "literature review", + feedback=feedback, + step=_i + 1, + temp=0.8 + ) + if self.verbose: + print(resp, "\n~~~~~~~~~~~") + + # If we exceed max_tries: raise Exception("Max tries during phase: Literature Review") + def human_in_loop(self, phase, phase_prod): """ Get human feedback for phase output From 953de30c61e9b5617096591b1b2578e75066f491 Mon Sep 17 00:00:00 2001 From: Taddeus Buica Date: Wed, 15 Jan 2025 18:31:02 +0200 Subject: [PATCH 04/10] fix code execution timeout --- .gitignore | 4 +++- tools.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index ea6f4be..64d75d7 100644 --- a/.gitignore +++ b/.gitignore @@ -13,4 +13,6 @@ research_dir/* state_saves/* __pycache__/* Figure*.png -testrun.py \ No newline at end of file +testrun.py +data/* +projects/* diff --git a/tools.py b/tools.py index 5d0d4a9..69a5a00 100755 --- a/tools.py +++ b/tools.py @@ -349,7 +349,7 @@ def run_code(queue): import traceback -def execute_code(code_str, timeout=60, MAX_LEN=1000): +def execute_code(code_str, timeout=3600, MAX_LEN=1000): #print(code_str) # prevent plotting errors From 9c98667c16b77bcdf823701b0efe756659c68042 Mon Sep 17 00:00:00 2001 From: Taddeus Buica Date: Thu, 16 Jan 2025 11:17:37 +0200 Subject: [PATCH 05/10] update code exec to 10 mins timeout --- tools.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools.py b/tools.py index 69a5a00..abcbc13 100755 --- a/tools.py +++ b/tools.py @@ -292,7 +292,7 @@ def retrieve_full_paper_text(self, query): import io import traceback -def execute_code(code_str, timeout=180): +def execute_code(code_str, timeout=600): if "load_dataset('pubmed" in code_str: return "pubmed Download took way too long. Program terminated" @@ -349,7 +349,7 @@ def run_code(queue): import traceback -def execute_code(code_str, timeout=3600, MAX_LEN=1000): +def execute_code(code_str, timeout=600, MAX_LEN=1000): #print(code_str) # prevent plotting errors From 24a0835a2695a38394776ede38dd9641ee9c676a Mon Sep 17 00:00:00 2001 From: Taddeus Buica Date: Thu, 16 Jan 2025 13:13:28 +0200 Subject: [PATCH 06/10] Implemented better code execution based on usage --- ai_lab_repo.py | 4 +- tools.py | 332 +++++++++++++++++++++++++++++-------------------- 2 files changed, 196 insertions(+), 140 deletions(-) diff --git a/ai_lab_repo.py b/ai_lab_repo.py index cebb8ec..5f1e5e0 100755 --- a/ai_lab_repo.py +++ b/ai_lab_repo.py @@ -355,7 +355,7 @@ def data_preparation(self): if self.verbose: print("#"*40, f"\nThe following is dialogue produced by the SW Engineer: {dialogue}", "\n", "#"*40) if "```SUBMIT_CODE" in resp: final_code = extract_prompt(resp, "SUBMIT_CODE") - code_resp = execute_code(final_code, timeout=60) + code_resp = execute_code(final_code) if self.verbose: print("!"*100, "\n", f"CODE RESPONSE: {code_resp}") swe_feedback += f"\nCode Response: {code_resp}\n" if "[CODE EXECUTION ERROR]" in code_resp: @@ -389,7 +389,7 @@ def data_preparation(self): if "```python" in resp: code = extract_prompt(resp, "python") code = self.ml_engineer.dataset_code + "\n" + code - code_resp = execute_code(code, timeout=120) + code_resp = execute_code(code) ml_command = f"Code produced by the ML agent:\n{code}" ml_feedback += f"\nCode Response: {code_resp}\n" if self.verbose: print("!"*100, "\n", f"CODE RESPONSE: {code_resp}") diff --git a/tools.py b/tools.py index abcbc13..332c991 100755 --- a/tools.py +++ b/tools.py @@ -1,3 +1,7 @@ +##################### +# tools.py +##################### + from utils import * import time @@ -16,14 +20,19 @@ import traceback import concurrent.futures +import psutil +import subprocess +############################################################################### +# HFDataSearch Class # +############################################################################### class HFDataSearch: def __init__(self, like_thr=3, dwn_thr=50) -> None: """ Class for finding relevant huggingface datasets - :param like_thr: - :param dwn_thr: + :param like_thr: threshold of 'likes' + :param dwn_thr: threshold of 'downloads' """ self.dwn_thr = dwn_thr self.like_thr = like_thr @@ -103,21 +112,24 @@ def retrieve_ds(self, query, N=10, sim_w=1.0, like_w=0.0, dwn_w=0.0): cosine_similarities = linear_kernel(query_vector, self.description_vectors).flatten() # Normalize cosine similarities cosine_similarities_norm = self._normalize(cosine_similarities) + # Compute final scores final_scores = ( - sim_w * cosine_similarities_norm + - like_w * self.likes_norm + - dwn_w * self.downloads_norm + sim_w * cosine_similarities_norm + + like_w * self.likes_norm + + dwn_w * self.downloads_norm ) + # Get top N indices top_indices = final_scores.argsort()[-N:][::-1] # Convert indices to Python ints top_indices = [int(i) for i in top_indices] top_datasets = [self.ds[i] for i in top_indices] - # check if dataset has a test & train set - has_test_set = list() - has_train_set = list() - ds_size_info = list() + + # Check if dataset has a test & train set; gather size info + has_test_set = [] + has_train_set = [] + ds_size_info = [] for i in top_indices: try: dbuilder = load_dataset_builder(self.ds[i]["id"], trust_remote_code=True).info @@ -132,10 +144,11 @@ def retrieve_ds(self, query, N=10, sim_w=1.0, like_w=0.0, dwn_w=0.0): has_train_set.append(False) ds_size_info.append((None, None, None, None)) continue - # Print number of examples for + has_test, has_train = "test" in dbuilder.splits, "train" in dbuilder.splits has_test_set.append(has_test) has_train_set.append(has_train) + test_dwn_size, test_elem_size = None, None train_dwn_size, train_elem_size = None, None if has_test: @@ -144,7 +157,10 @@ def retrieve_ds(self, query, N=10, sim_w=1.0, like_w=0.0, dwn_w=0.0): if has_train: train_dwn_size = bytes2human(dbuilder.splits["train"].num_bytes) train_elem_size = dbuilder.splits["train"].num_examples + ds_size_info.append((test_dwn_size, test_elem_size, train_dwn_size, train_elem_size)) + + # Attach metadata to the top_datasets for _i in range(len(top_datasets)): top_datasets[_i]["has_test_set"] = has_test_set[_i] top_datasets[_i]["has_train_set"] = has_train_set[_i] @@ -152,6 +168,7 @@ def retrieve_ds(self, query, N=10, sim_w=1.0, like_w=0.0, dwn_w=0.0): top_datasets[_i]["test_element_size"] = ds_size_info[_i][1] top_datasets[_i]["train_download_size"] = ds_size_info[_i][2] top_datasets[_i]["train_element_size"] = ds_size_info[_i][3] + return top_datasets def results_str(self, results): @@ -160,7 +177,7 @@ def results_str(self, results): :param results: (list(dict)) list of results from search :return: (list(str)) list of results in human-readable format """ - result_strs = list() + result_strs = [] for result in results: res_str = f"Dataset ID: {result['id']}\n" res_str += f"Description: {result['description']}\n" @@ -175,27 +192,51 @@ def results_str(self, results): result_strs.append(res_str) return result_strs +############################################################################### +# SemanticScholarSearch Class # +############################################################################### class SemanticScholarSearch: def __init__(self): self.sch_engine = SemanticScholar(retry=False) def find_papers_by_str(self, query, N=10): - paper_sums = list() - results = self.sch_engine.search_paper(query, limit=N, min_citation_count=3, open_access_pdf=True) + """ + Finds top-N papers from semantic scholar + :param query: str + :param N: number of results + :return: list of string summaries + """ + paper_sums = [] + results = self.sch_engine.search_paper( + query, + limit=N, + min_citation_count=3, + open_access_pdf=True + ) for _i in range(len(results)): - paper_sum = f'Title: {results[_i].title}\n' - paper_sum += f'Abstract: {results[_i].abstract}\n' - paper_sum += f'Citations: {results[_i].citationCount}\n' - paper_sum += f'Release Date: year {results[_i].publicationDate.year}, month {results[_i].publicationDate.month}, day {results[_i].publicationDate.day}\n' - paper_sum += f'Venue: {results[_i].venue}\n' - paper_sum += f'Paper ID: {results[_i].externalIds["DOI"]}\n' + paper_sum = f"Title: {results[_i].title}\n" + paper_sum += f"Abstract: {results[_i].abstract}\n" + paper_sum += f"Citations: {results[_i].citationCount}\n" + paper_sum += ( + f"Release Date: year {results[_i].publicationDate.year}, " + f"month {results[_i].publicationDate.month}, " + f"day {results[_i].publicationDate.day}\n" + ) + paper_sum += f"Venue: {results[_i].venue}\n" + paper_sum += f"Paper ID: {results[_i].externalIds['DOI']}\n" paper_sums.append(paper_sum) return paper_sums def retrieve_full_paper_text(self, query): + """ + NOTE: Not implemented in this example + """ pass +############################################################################### +# ArxivSearch Class # +############################################################################### class ArxivSearch: def __init__(self): @@ -203,43 +244,45 @@ def __init__(self): self.sch_engine = arxiv.Client() def _process_query(self, query: str) -> str: - """Process query string to fit within MAX_QUERY_LENGTH while preserving as much information as possible""" + """ + Process query string to fit within MAX_QUERY_LENGTH + while preserving as much info as possible + """ MAX_QUERY_LENGTH = 300 - if len(query) <= MAX_QUERY_LENGTH: return query - + # Split into words words = query.split() processed_query = [] current_length = 0 - + # Add words while staying under the limit - # Account for spaces between words for word in words: - # +1 for the space that will be added between words if current_length + len(word) + 1 <= MAX_QUERY_LENGTH: processed_query.append(word) current_length += len(word) + 1 else: break - return ' '.join(processed_query) def find_papers_by_str(self, query, N=20): + """ + Finds top-N relevant arXiv papers + """ processed_query = self._process_query(query) max_retries = 3 retry_count = 0 - + while retry_count < max_retries: try: search = arxiv.Search( query="abs:" + processed_query, max_results=N, - sort_by=arxiv.SortCriterion.Relevance) + sort_by=arxiv.SortCriterion.Relevance + ) - paper_sums = list() - # `results` is a generator; you can iterate over its elements one by one... + paper_sums = [] for r in self.sch_engine.results(search): paperid = r.pdf_url.split("/")[-1] pubdate = str(r.published).split(" ")[0] @@ -251,148 +294,161 @@ def find_papers_by_str(self, query, N=20): paper_sums.append(paper_sum) time.sleep(2.0) return "\n".join(paper_sums) - + except Exception as e: retry_count += 1 if retry_count < max_retries: - # 递增延时 + # Exponential-ish back-off time.sleep(2 * retry_count) continue - + + # If unsuccessful return None def retrieve_full_paper_text(self, query): - pdf_text = str() + """ + Download and extract full text from arXiv PDF + """ + pdf_text = "" + # Attempt to get single result with the provided paper ID paper = next(arxiv.Client().results(arxiv.Search(id_list=[query]))) - # Download the PDF to the PWD with a custom filename. + + # Download the PDF to a local file paper.download_pdf(filename="downloaded-paper.pdf") - # creating a pdf reader object - reader = PdfReader('downloaded-paper.pdf') - # Iterate over all the pages + + # Create a pdf reader object + reader = PdfReader("downloaded-paper.pdf") + + # Iterate over pages for page_number, page in enumerate(reader.pages, start=1): - # Extract text from the page try: text = page.extract_text() - except Exception as e: + except Exception: os.remove("downloaded-paper.pdf") time.sleep(2.0) return "EXTRACTION FAILED" - # Do something with the text (e.g., print it) pdf_text += f"--- Page {page_number} ---" pdf_text += text pdf_text += "\n" + os.remove("downloaded-paper.pdf") time.sleep(2.0) return pdf_text -""" -import multiprocessing -import sys -import io -import traceback - -def execute_code(code_str, timeout=600): - if "load_dataset('pubmed" in code_str: - return "pubmed Download took way too long. Program terminated" - - def run_code(queue): - # Redirect stdout to capture print outputs - output_capture = io.StringIO() - sys.stdout = output_capture - try: - exec_globals = {} - exec(code_str, exec_globals) - except Exception as e: - output_capture.write(f"[CODE EXECUTION ERROR]: {str(e)}\n") - traceback.print_exc(file=output_capture) - finally: - # Put the output in the queue - queue.put(output_capture.getvalue()) - # Restore stdout - sys.stdout = sys.__stdout__ - - # Create a multiprocessing Queue to capture the output - queue = multiprocessing.Queue() - # Create a new Process - process = multiprocessing.Process(target=run_code, args=(queue,)) - process.start() - # Wait for the process to finish or timeout - process.join(timeout) - - if process.is_alive(): - process.terminate() - process.join() - return f"[CODE EXECUTION ERROR]: Code execution exceeded the timeout limit of {timeout} seconds. You must reduce the time complexity of your code." - else: - # Retrieve the output from the queue - output = queue.get() - return output - -""" - -import io -import sys -import traceback -import concurrent.futures - - - -import multiprocessing -import io -import sys -import traceback -import multiprocessing -import io -import sys -import traceback - - -def execute_code(code_str, timeout=600, MAX_LEN=1000): - #print(code_str) - - # prevent plotting errors +############################################################################### +# execute_code Function # +############################################################################### +# +# This new approach lets truly busy code keep running if it's using CPU, +# but kills it if idle for too long, or if it hits a hard overall timeout. +# +############################################################################### + +def execute_code( + code_str, + max_total_time=1800, # Hard limit on total run time (seconds) + max_idle_time=60, # If no CPU usage / no prints for this many secs, kill + max_stdout_len=2000 # max length of captured logs +): + """ + Execute code in a separate subprocess with: + 1. absolute time limit (max_total_time) + 2. idle time limit (max_idle_time) based on CPU usage + 3. capture stdout (limited by max_stdout_len) + """ import matplotlib - matplotlib.use('Agg') # Use the non-interactive Agg backend + matplotlib.use('Agg') # Use a non-interactive backend import matplotlib.pyplot as plt - # Preventing execution of certain resource-intensive datasets + # Basic checks if "load_dataset('pubmed" in code_str: return "[CODE EXECUTION ERROR] pubmed Download took way too long. Program terminated" if "exit(" in code_str: - return "[CODE EXECUTION ERROR] The exit() command is not allowed you must remove this." - #print(code_str) - # Capturing the output + return "[CODE EXECUTION ERROR] The exit() command is not allowed; please remove it." + + # Write the user's code to a temporary script: + temp_filename = "temp_script.py" + with open(temp_filename, "w", encoding="utf-8") as f: + f.write(code_str) + + # Start a subprocess to run that script + start_time = time.time() output_capture = io.StringIO() - sys.stdout = output_capture - # Create a new global context for exec - exec_globals = globals() + # Launch the subprocess in text mode, capturing stdout + process = subprocess.Popen( + [sys.executable, temp_filename], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=True + ) - def run_code(): - try: - # Executing the code in the global namespace - exec(code_str, exec_globals) - except Exception as e: - output_capture.write(f"[CODE EXECUTION ERROR]: {str(e)}\n") - traceback.print_exc(file=output_capture) - - try: - # Running code in a separate thread with a timeout - with concurrent.futures.ThreadPoolExecutor() as executor: - future = executor.submit(run_code) - future.result(timeout=timeout) - except concurrent.futures.TimeoutError: - return f"[CODE EXECUTION ERROR]: Code execution exceeded the timeout limit of {timeout} seconds. You must reduce the time complexity of your code." - except Exception as e: - return f"[CODE EXECUTION ERROR]: {str(e)}" - finally: - # Restoring standard output - sys.stdout = sys.__stdout__ - - # Returning the captured output - return output_capture.getvalue()[:MAX_LEN] + # We'll track CPU usage with psutil + proc_psutil = psutil.Process(process.pid) + + last_output_time = time.time() + last_cpu_check_time = time.time() + kill_reason = None + while True: + # If the process ended, break + if process.poll() is not None: + break + # Grab the next line if available + line = None + try: + # If there's no line, readline() can block or return '' + line = process.stdout.readline() + except Exception: + pass + + # If we read some line + if line: + output_capture.write(line) + last_output_time = time.time() + + # Check CPU usage every 2 seconds + if (time.time() - last_cpu_check_time) > 2.0: + last_cpu_check_time = time.time() + cpu_usage = proc_psutil.cpu_percent(interval=0.1) + # If usage > ~1%, that means it's not idle + if cpu_usage > 1.0: + last_output_time = time.time() + + # If we've been idle for too long + if (time.time() - last_output_time) > max_idle_time: + kill_reason = f"Idle for {max_idle_time} seconds." + process.kill() + break + + # If we've exceeded the total time + if (time.time() - start_time) > max_total_time: + kill_reason = f"Exceeded total runtime of {max_total_time} seconds." + process.kill() + break + + time.sleep(0.05) # Small pause to reduce busy-wait CPU usage + + # Drain any leftover output + if process.poll() is not None: + leftover = process.stdout.read() + if leftover: + output_capture.write(leftover) + + process.stdout.close() + process.wait() + + # Remove temp file if desired + if os.path.exists(temp_filename): + os.remove(temp_filename) + + output = output_capture.getvalue() + if kill_reason: + output += f"\n[CODE EXECUTION STOPPED]: {kill_reason}\n" + + # Truncate output if needed + return output[:max_stdout_len] From 0eb467c20a77612b8a4ba6e689ddcb06911ece81 Mon Sep 17 00:00:00 2001 From: Taddeus Buica Date: Thu, 16 Jan 2025 13:23:58 +0200 Subject: [PATCH 07/10] updated dependencies --- requirements.txt | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index e08992b..eed95c4 100755 --- a/requirements.txt +++ b/requirements.txt @@ -10,32 +10,41 @@ arxiv==2.1.3 astunparse==1.6.3 async-timeout==5.0.1 attrs==24.2.0 +beautifulsoup4==4.12.3 blis==1.0.1 catalogue==2.0.10 certifi==2024.8.30 charset-normalizer==3.4.0 click==8.1.7 cloudpathlib==0.20.0 +cloudpickle==3.1.1 confection==0.1.5 contourpy==1.3.0 cycler==0.12.1 cymem==2.0.10 datasets==3.1.0 diffusers==0.31.0 -dill==0.3.8 +dill==0.3.9 distro==1.9.0 +EMD-signal @ git+https://github.com/laszukdawid/PyEMD.git@4fc40017c1db8f1fceda4370a12314e1dedf8dde exceptiongroup==1.2.2 +Farama-Notifications==0.0.4 feedparser==6.0.11 filelock==3.16.1 flatbuffers==24.3.25 fonttools==4.55.0 +frozendict==2.4.6 frozenlist==1.5.0 fsspec==2024.9.0 gast==0.6.0 google-pasta==0.2.0 grpcio==1.68.0 +gym==0.26.2 +gym-notices==0.0.8 +gymnasium==1.0.0 h11==0.14.0 h5py==3.12.1 +html5lib==1.1 httpcore==1.0.7 httpx==0.27.2 huggingface-hub==0.26.2 @@ -52,6 +61,7 @@ langcodes==3.5.0 language_data==1.3.0 lazy_loader==0.4 libclang==18.1.1 +lxml==5.3.0 marisa-trie==1.2.1 Markdown==3.7 markdown-it-py==3.0.0 @@ -61,21 +71,27 @@ mdurl==0.1.2 ml-dtypes==0.4.1 mpmath==1.3.0 multidict==6.1.0 -multiprocess==0.70.16 +multiprocess==0.70.17 +multitasking==0.0.11 murmurhash==1.0.11 namex==0.0.8 nest-asyncio==1.6.0 networkx==3.2.1 nltk==3.9.1 -numpy==2.0.2 +numpy==1.26.4 openai==1.55.1 opt_einsum==3.4.0 optree==0.13.1 packaging==24.2 pandas==2.2.3 +pathos==0.3.3 patsy==1.0.1 +peewee==3.17.8 pillow==11.0.0 +platformdirs==4.3.6 plotly==5.24.1 +pox==0.3.5 +ppft==1.7.6.9 preshed==3.0.9 propcache==0.2.0 protobuf==5.28.3 @@ -83,11 +99,13 @@ psutil==6.1.0 pyarrow==18.1.0 pydantic==2.10.2 pydantic_core==2.27.1 +pyemd==1.0.0 Pygments==2.18.0 pyparsing==3.2.0 pypdf==5.1.0 python-dateutil==2.9.0.post0 pytz==2024.2 +PyWavelets==1.8.0 PyYAML==6.0.2 regex==2024.11.6 requests==2.32.3 @@ -104,10 +122,12 @@ shellingham==1.5.4 six==1.16.0 smart-open==7.0.5 sniffio==1.3.1 +soupsieve==2.6 spacy==3.8.2 spacy-legacy==3.0.12 spacy-loggers==1.0.5 srsly==2.4.8 +stable_baselines3==2.4.1 statsmodels==0.14.4 sympy==1.13.1 tenacity==9.0.0 @@ -130,8 +150,10 @@ tzdata==2024.2 urllib3==2.2.3 wasabi==1.1.3 weasel==0.4.1 +webencodings==0.5.1 Werkzeug==3.1.3 wrapt==1.17.0 xxhash==3.5.0 yarl==1.18.0 +yfinance==0.2.51 zipp==3.21.0 From df3151b34bca3bb5e7c75f7e45e380d4c19315f3 Mon Sep 17 00:00:00 2001 From: Taddeus Buica Date: Thu, 16 Jan 2025 14:58:43 +0200 Subject: [PATCH 08/10] also gracefully handles "zombie" processes by catching exceptions # or checking .is_running() / .status() --- tools.py | 66 ++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 21 deletions(-) diff --git a/tools.py b/tools.py index 332c991..b2b67e9 100755 --- a/tools.py +++ b/tools.py @@ -341,14 +341,16 @@ def retrieve_full_paper_text(self, query): # execute_code Function # ############################################################################### # -# This new approach lets truly busy code keep running if it's using CPU, -# but kills it if idle for too long, or if it hits a hard overall timeout. +# - uses psutil to keep code running if it remains busy. +# - kills code if idle for too long or hits a hard time limit. +# - also gracefully handles "zombie" processes by catching exceptions +# or checking .is_running() / .status(). # ############################################################################### def execute_code( - code_str, - max_total_time=1800, # Hard limit on total run time (seconds) + code_str, + max_total_time=7200, # Hard limit on total runtime (seconds) max_idle_time=60, # If no CPU usage / no prints for this many secs, kill max_stdout_len=2000 # max length of captured logs ): @@ -357,6 +359,7 @@ def execute_code( 1. absolute time limit (max_total_time) 2. idle time limit (max_idle_time) based on CPU usage 3. capture stdout (limited by max_stdout_len) + 4. gracefully handle zombie processes or processes that vanish """ import matplotlib matplotlib.use('Agg') # Use a non-interactive backend @@ -368,12 +371,10 @@ def execute_code( if "exit(" in code_str: return "[CODE EXECUTION ERROR] The exit() command is not allowed; please remove it." - # Write the user's code to a temporary script: temp_filename = "temp_script.py" with open(temp_filename, "w", encoding="utf-8") as f: f.write(code_str) - # Start a subprocess to run that script start_time = time.time() output_capture = io.StringIO() @@ -385,54 +386,76 @@ def execute_code( universal_newlines=True ) - # We'll track CPU usage with psutil proc_psutil = psutil.Process(process.pid) - last_output_time = time.time() last_cpu_check_time = time.time() kill_reason = None while True: - # If the process ended, break + # 1) If the process ended, break if process.poll() is not None: break - # Grab the next line if available + # 2) Read any line from stdout line = None try: - # If there's no line, readline() can block or return '' line = process.stdout.readline() except Exception: pass - # If we read some line if line: output_capture.write(line) last_output_time = time.time() - # Check CPU usage every 2 seconds + # 3) CPU usage check every 2 seconds if (time.time() - last_cpu_check_time) > 2.0: last_cpu_check_time = time.time() - cpu_usage = proc_psutil.cpu_percent(interval=0.1) - # If usage > ~1%, that means it's not idle + + # (a) Check if process is still considered "running" in psutil + if not proc_psutil.is_running(): + # If it’s not running, possibly a zombie or gone; break + kill_reason = "Process ended or is zombie" + break + + # (b) Check if status is zombie + try: + if proc_psutil.status() == psutil.ZOMBIE: + kill_reason = "Process is zombie" + process.kill() + break + except psutil.Error: + # If we fail to get status, we treat it as an error + kill_reason = "Could not retrieve process status" + process.kill() + break + + # (c) Try CPU usage + try: + cpu_usage = proc_psutil.cpu_percent(interval=0.1) + except (psutil.ZombieProcess, psutil.NoSuchProcess, psutil.AccessDenied): + kill_reason = "Process is zombie or gone (cpu_percent error)" + process.kill() + break + + # If usage > 1%, consider it active if cpu_usage > 1.0: last_output_time = time.time() - # If we've been idle for too long + # 4) Idle check if (time.time() - last_output_time) > max_idle_time: kill_reason = f"Idle for {max_idle_time} seconds." process.kill() break - # If we've exceeded the total time + # 5) Total time check if (time.time() - start_time) > max_total_time: kill_reason = f"Exceeded total runtime of {max_total_time} seconds." process.kill() break - time.sleep(0.05) # Small pause to reduce busy-wait CPU usage + time.sleep(0.05) - # Drain any leftover output + # 6) Drain leftover stdout if process.poll() is not None: leftover = process.stdout.read() if leftover: @@ -441,14 +464,15 @@ def execute_code( process.stdout.close() process.wait() - # Remove temp file if desired + # 7) Remove temp file if desired if os.path.exists(temp_filename): os.remove(temp_filename) + # 8) Attach kill_reason to logs if any output = output_capture.getvalue() if kill_reason: output += f"\n[CODE EXECUTION STOPPED]: {kill_reason}\n" - # Truncate output if needed return output[:max_stdout_len] + From 2d6f3807773362e1eca9cf27aa6eaa19a2b70142 Mon Sep 17 00:00:00 2001 From: Taddeus Buica Date: Thu, 16 Jan 2025 15:14:31 +0200 Subject: [PATCH 09/10] fix zombie issue --- tools.py | 47 ++++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/tools.py b/tools.py index b2b67e9..9eaa4cc 100755 --- a/tools.py +++ b/tools.py @@ -352,14 +352,14 @@ def execute_code( code_str, max_total_time=7200, # Hard limit on total runtime (seconds) max_idle_time=60, # If no CPU usage / no prints for this many secs, kill - max_stdout_len=2000 # max length of captured logs + max_stdout_len=2000 ): """ Execute code in a separate subprocess with: - 1. absolute time limit (max_total_time) - 2. idle time limit (max_idle_time) based on CPU usage - 3. capture stdout (limited by max_stdout_len) - 4. gracefully handle zombie processes or processes that vanish + 1) absolute time limit (max_total_time) + 2) idle time limit (max_idle_time) based on CPU usage + 3) capture stdout (limited by max_stdout_len) + 4) gracefully handle zombie processes or processes that vanish """ import matplotlib matplotlib.use('Agg') # Use a non-interactive backend @@ -378,7 +378,6 @@ def execute_code( start_time = time.time() output_capture = io.StringIO() - # Launch the subprocess in text mode, capturing stdout process = subprocess.Popen( [sys.executable, temp_filename], stdout=subprocess.PIPE, @@ -391,17 +390,20 @@ def execute_code( last_cpu_check_time = time.time() kill_reason = None + # For versions of psutil which do have STATUS_ZOMBIE: + # We'll prefer that, else fallback to the literal "zombie". + ZOMBIE_STRING = getattr(psutil, "STATUS_ZOMBIE", "zombie").lower() + while True: - # 1) If the process ended, break + # 1) If the process ended, break the loop if process.poll() is not None: break # 2) Read any line from stdout - line = None try: line = process.stdout.readline() except Exception: - pass + line = None if line: output_capture.write(line) @@ -411,29 +413,32 @@ def execute_code( if (time.time() - last_cpu_check_time) > 2.0: last_cpu_check_time = time.time() - # (a) Check if process is still considered "running" in psutil + # (a) If process is not running => presumably done or zombie if not proc_psutil.is_running(): - # If it’s not running, possibly a zombie or gone; break - kill_reason = "Process ended or is zombie" + kill_reason = "Process ended or unknown state" break - # (b) Check if status is zombie + # (b) Attempt to get the string status try: - if proc_psutil.status() == psutil.ZOMBIE: - kill_reason = "Process is zombie" - process.kill() - break + # e.g. "running", "sleeping", "zombie", etc. + status_str = proc_psutil.status().lower() except psutil.Error: - # If we fail to get status, we treat it as an error + # If we can't retrieve status, assume it's gone kill_reason = "Could not retrieve process status" process.kill() break + # If status is "zombie", kill it + if status_str == ZOMBIE_STRING: + kill_reason = "Process is zombie" + process.kill() + break + # (c) Try CPU usage try: cpu_usage = proc_psutil.cpu_percent(interval=0.1) - except (psutil.ZombieProcess, psutil.NoSuchProcess, psutil.AccessDenied): - kill_reason = "Process is zombie or gone (cpu_percent error)" + except (psutil.NoSuchProcess, psutil.AccessDenied): + kill_reason = "Process is gone or cannot be accessed" process.kill() break @@ -453,7 +458,7 @@ def execute_code( process.kill() break - time.sleep(0.05) + time.sleep(0.05) # Short pause # 6) Drain leftover stdout if process.poll() is not None: From 44adc339a12d857fd1b6050f7be8ea9b6dfe7d32 Mon Sep 17 00:00:00 2001 From: Taddeus Buica Date: Thu, 16 Jan 2025 17:30:11 +0200 Subject: [PATCH 10/10] removed unnecessary comments --- tools.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/tools.py b/tools.py index 9eaa4cc..0b592b8 100755 --- a/tools.py +++ b/tools.py @@ -1,7 +1,3 @@ -##################### -# tools.py -##################### - from utils import * import time @@ -23,10 +19,6 @@ import psutil import subprocess -############################################################################### -# HFDataSearch Class # -############################################################################### - class HFDataSearch: def __init__(self, like_thr=3, dwn_thr=50) -> None: """ @@ -192,10 +184,6 @@ def results_str(self, results): result_strs.append(res_str) return result_strs -############################################################################### -# SemanticScholarSearch Class # -############################################################################### - class SemanticScholarSearch: def __init__(self): self.sch_engine = SemanticScholar(retry=False) @@ -234,10 +222,6 @@ def retrieve_full_paper_text(self, query): """ pass -############################################################################### -# ArxivSearch Class # -############################################################################### - class ArxivSearch: def __init__(self): # Construct the default API client. @@ -337,17 +321,6 @@ def retrieve_full_paper_text(self, query): return pdf_text -############################################################################### -# execute_code Function # -############################################################################### -# -# - uses psutil to keep code running if it remains busy. -# - kills code if idle for too long or hits a hard time limit. -# - also gracefully handles "zombie" processes by catching exceptions -# or checking .is_running() / .status(). -# -############################################################################### - def execute_code( code_str, max_total_time=7200, # Hard limit on total runtime (seconds)