-
Notifications
You must be signed in to change notification settings - Fork 1
feat: upgrade ergo-airflow plugin for Airflow 3.x compatibility #32
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
4aae0fb
48ed37e
dad0d9a
906fc68
69a58a4
e3200c7
10e400d
9b688f1
fb11ee6
6e5b2fd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -1,8 +1,7 @@ | ||||||
| from datetime import timedelta | ||||||
| from datetime import datetime, timedelta | ||||||
|
|
||||||
| from airflow import DAG | ||||||
| from airflow.utils import timezone | ||||||
| from airflow.utils.dates import days_ago | ||||||
|
|
||||||
| from ergo.config import Config | ||||||
| from ergo.operators.sqs.sqs_task_pusher import SqsTaskPusherOperator | ||||||
|
|
@@ -17,7 +16,7 @@ | |||||
| 'depends_on_past': False, | ||||||
| 'retries': 2, | ||||||
| 'retry_delay': timedelta(minutes=1), | ||||||
| 'start_date': days_ago(1), | ||||||
| 'start_date': datetime(2024, 1, 1), | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🏢 Use timezone-aware start_date A naive datetime can trigger scheduling warnings and run offsets; use Airflow’s timezone-aware datetime helper.
Suggested change
|
||||||
| 'priority_weight': 900, | ||||||
| } | ||||||
|
|
||||||
|
|
@@ -30,7 +29,7 @@ | |||||
| 'calipso_ergo_task_queuer', | ||||||
| default_args=default_args, | ||||||
| is_paused_upon_creation=False, | ||||||
| schedule_interval=timedelta(seconds=10), | ||||||
| schedule=timedelta(seconds=10), | ||||||
| catchup=False, | ||||||
| max_active_runs=max_concurrent_runs, | ||||||
| dagrun_timeout=timedelta(minutes=5) | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -1,9 +1,8 @@ | ||||||
| from datetime import timedelta | ||||||
| from datetime import datetime, timedelta | ||||||
|
|
||||||
| from airflow import DAG | ||||||
| from airflow.contrib.sensors.aws_sqs_sensor import SQSSensor | ||||||
| from airflow.providers.amazon.aws.sensors.sqs import SqsSensor | ||||||
| from airflow.utils import timezone | ||||||
| from airflow.utils.dates import days_ago | ||||||
|
|
||||||
| from ergo.config import Config | ||||||
| from ergo.operators.sqs.result_from_messages import \ | ||||||
|
|
@@ -16,7 +15,7 @@ | |||||
| 'depends_on_past': False, | ||||||
| 'retries': 10, | ||||||
| 'retry_delay': timedelta(seconds=30), | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🏢 Use timezone-aware start_date A naive datetime can trigger scheduling warnings and run offsets; use Airflow’s timezone-aware datetime helper.
Suggested change
|
||||||
| 'start_date': days_ago(1), | ||||||
| 'start_date': datetime(2024, 1, 1), | ||||||
| 'priority_weight': 900, | ||||||
| } | ||||||
|
|
||||||
|
|
@@ -27,12 +26,12 @@ | |||||
| 'ergo_job_collector', | ||||||
| default_args=default_args, | ||||||
| is_paused_upon_creation=False, | ||||||
| schedule_interval=timedelta(seconds=10), | ||||||
| schedule=timedelta(seconds=10), | ||||||
| catchup=False, | ||||||
| dagrun_timeout=timedelta(minutes=15), | ||||||
| max_active_runs=Config.max_runs_dag_job_collector | ||||||
| ) as dag: | ||||||
| sqs_collector = SQSSensor( | ||||||
| sqs_collector = SqsSensor( | ||||||
| task_id=TASK_ID_SQS_COLLECTOR, | ||||||
| sqs_queue=sqs_queue_url, | ||||||
| max_messages=10, | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -1,8 +1,7 @@ | ||||||
| from datetime import timedelta | ||||||
| from datetime import datetime, timedelta | ||||||
|
|
||||||
| from airflow import DAG | ||||||
| from airflow.utils import timezone | ||||||
| from airflow.utils.dates import days_ago | ||||||
|
|
||||||
| from ergo.config import Config | ||||||
| from ergo.operators.sqs.sqs_task_pusher import SqsTaskPusherOperator | ||||||
|
|
@@ -17,7 +16,7 @@ | |||||
| 'depends_on_past': False, | ||||||
| 'retries': 2, | ||||||
| 'retry_delay': timedelta(minutes=1), | ||||||
| 'start_date': days_ago(1), | ||||||
| 'start_date': datetime(2024, 1, 1), | ||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🏢 Use timezone-aware start_date A naive datetime can trigger scheduling warnings and run offsets; use Airflow’s timezone-aware datetime helper.
Suggested change
|
||||||
| 'priority_weight': 900, | ||||||
| } | ||||||
|
|
||||||
|
|
@@ -30,7 +29,7 @@ | |||||
| 'selenium_ergo_task_queuer', | ||||||
| default_args=default_args, | ||||||
| is_paused_upon_creation=False, | ||||||
| schedule_interval=timedelta(seconds=10), | ||||||
| schedule=timedelta(seconds=10), | ||||||
| catchup=False, | ||||||
| max_active_runs=max_concurrent_runs, | ||||||
| dagrun_timeout=timedelta(minutes=5) | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,5 +1,4 @@ | ||||||||||||||||||
| from airflow.models.baseoperator import BaseOperatorLink | ||||||||||||||||||
| from flask import url_for | ||||||||||||||||||
| from airflow.sdk import BaseOperatorLink | ||||||||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ✨ Import urlencode for safe query construction The link now builds a query string manually; import urlencode so parameters are encoded safely.
Suggested change
|
||||||||||||||||||
|
|
||||||||||||||||||
|
|
||||||||||||||||||
| class ErgoTaskDetailLink(BaseOperatorLink): | ||||||||||||||||||
|
|
@@ -9,9 +8,5 @@ class ErgoTaskDetailLink(BaseOperatorLink): | |||||||||||||||||
| name = 'Ergo' | ||||||||||||||||||
|
|
||||||||||||||||||
| def get_link(self, operator, dttm): | ||||||||||||||||||
| return url_for( | ||||||||||||||||||
| 'ErgoView.task_detail', | ||||||||||||||||||
| ti_task_id=operator.task_id, | ||||||||||||||||||
| ti_dag_id=operator.dag_id, | ||||||||||||||||||
| ti_execution_date=dttm | ||||||||||||||||||
| ) | ||||||||||||||||||
| # In Airflow 3, the www module is gone; return a simple URL path | ||||||||||||||||||
| return f'/ergo/task_detail?ti_task_id={operator.task_id}&ti_dag_id={operator.dag_id}&ti_execution_date={dttm}' | ||||||||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🏢 URL-encode task detail parameters dttm renders with spaces/timezone and can create malformed URLs. Build the query using urlencode and use an ISO string; also handle None to avoid parse failures in the view.
Suggested change
|
||||||||||||||||||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -54,7 +54,7 @@ def run_migrations_offline(): | |||||||||||||||||
| script output. | ||||||||||||||||||
|
|
||||||||||||||||||
| """ | ||||||||||||||||||
| url = config.get_main_option("sqlalchemy.url") | ||||||||||||||||||
| url = os.getenv('AIRFLOW__CORE__SQL_ALCHEMY_CONN', config.get_main_option("sqlalchemy.url")) | ||||||||||||||||||
| context.configure( | ||||||||||||||||||
| url=url, | ||||||||||||||||||
| target_metadata=target_metadata, | ||||||||||||||||||
|
|
@@ -75,8 +75,12 @@ def run_migrations_online(): | |||||||||||||||||
| and associate a connection with the context. | ||||||||||||||||||
|
|
||||||||||||||||||
| """ | ||||||||||||||||||
| cfg = config.get_section(config.config_ini_section) | ||||||||||||||||||
| db_url = os.getenv('AIRFLOW__CORE__SQL_ALCHEMY_CONN') | ||||||||||||||||||
| if db_url: | ||||||||||||||||||
| cfg['sqlalchemy.url'] = db_url | ||||||||||||||||||
| connectable = engine_from_config( | ||||||||||||||||||
| config.get_section(config.config_ini_section), | ||||||||||||||||||
| cfg, | ||||||||||||||||||
| prefix="sqlalchemy.", | ||||||||||||||||||
| poolclass=pool.NullPool, | ||||||||||||||||||
| ) | ||||||||||||||||||
|
Comment on lines
+83
to
86
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ✨ Guard against missing alembic config section config.get_section(...) can return None; assigning to cfg['sqlalchemy.url'] would raise a TypeError and prevent migrations. Use a default dict when the section is missing.
Suggested change
|
||||||||||||||||||
|
|
||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🏢 Use timezone-aware start_date
A naive datetime can trigger scheduling warnings and run offsets; use Airflow’s timezone-aware datetime helper.