Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[flake8]
# @see https://flake8.pycqa.org/en/latest/user/configuration.html?highlight=.flake8

exclude =
ckan
.git

# Extended output format.
format = pylint

# Show the source of errors.
show_source = True

max-complexity = 10
max-line-length = 127

# List ignore rules one per line.
ignore =
C901
W503
27 changes: 17 additions & 10 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
---
name: Tests
on: [push, pull_request]
on:
push:
pull_request:
branches:
- master

jobs:
lint:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -47,22 +53,23 @@ jobs:

steps:
- uses: actions/checkout@v3
timeout-minutes: 1

- name: Install requirements
run: |
pip install -r requirements.txt
pip install -r dev-requirements.txt
pip install -e .
# Replace default path to CKAN core config file with the one on the container
sed -i -e 's/use = config:.*/use = config:\/srv\/app\/src\/ckan\/test-core.ini/' test.ini
- name: Setup extension (CKAN >= 2.9)
if: ${{ matrix.ckan-version != '2.7' && matrix.ckan-version != '2.8' }}
run: |
ckan -c test.ini db init
ckan -c test.ini archiver init
- name: Setup extension (CKAN < 2.9)
if: ${{ matrix.ckan-version == '2.7' || matrix.ckan-version == '2.8' }}
timeout-minutes: 10

- name: Setup extension
run: |
paster --plugin=ckan db init -c test.ini
paster --plugin=ckanext-archiver archiver init -c test.ini
export CKAN_INI=test.ini
chmod u+x bin/ckan_cli
bin/ckan_cli db init
PASTER_PLUGIN=ckanext-archiver bin/ckan_cli archiver init

- name: Run tests
run: pytest --ckan-ini=test.ini --cov=ckanext.archiver --disable-warnings ckanext/archiver/tests
1 change: 1 addition & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ Config settings
* ``ckanext-archiver.max_content_length`` = the maximum size (in bytes) of files to archive (default ``50000000`` =50MB)
* ``ckanext-archiver.user_agent_string`` = identifies the archiver to servers it archives from
* ``ckanext-archiver.verify_https`` = true/false whether you want to verify https connections and therefore fail if it is specified in the URL but does not verify.
* ``ckan.download_proxy`` = URL to a HTTP/S proxy server that will be used to download resources.

4. Nightly report generation

Expand Down
75 changes: 75 additions & 0 deletions bin/ckan_cli
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/bin/sh

# Call either 'ckan' (from CKAN >= 2.9) or 'paster' (from CKAN <= 2.8)
# with appropriate syntax, depending on what is present on the system.
# This is intended to smooth the upgrade process from 2.8 to 2.9.
# Eg:
# ckan_cli jobs list
# could become either:
# paster --plugin=ckan jobs list -c /etc/ckan/default/production.ini
# or:
# ckan -c /etc/ckan/default/production.ini jobs list

# This script is aware of the VIRTUAL_ENV environment variable, and will
# attempt to respect it with similar behaviour to commands like 'pip'.
# Eg placing this script in a virtualenv 'bin' directory will cause it
# to call the 'ckan' or 'paster' command in that directory, while
# placing this script elsewhere will cause it to rely on the VIRTUAL_ENV
# variable, or if that is not set, the system PATH.

# Since the positioning of the CKAN configuration file is central to the
# differences between 'paster' and 'ckan', this script needs to be aware
# of the config file location. It will use the CKAN_INI environment
# variable if it exists, or default to /etc/ckan/default/production.ini.

# If 'paster' is being used, the default plugin is 'ckan'. A different
# plugin can be specified by setting the PASTER_PLUGIN environment
# variable. This variable is irrelevant if using the 'ckan' command.

CKAN_INI="${CKAN_INI:-/etc/ckan/default/production.ini}"
PASTER_PLUGIN="${PASTER_PLUGIN:-ckan}"
# First, look for a command alongside this file
ENV_DIR=$(dirname "$0")
if [ -f "$ENV_DIR/ckan" ]; then
COMMAND=ckan
elif [ -f "$ENV_DIR/paster" ]; then
COMMAND=paster
elif [ "$VIRTUAL_ENV" != "" ]; then
# If command not found alongside this file, check the virtualenv
ENV_DIR="$VIRTUAL_ENV/bin"
if [ -f "$ENV_DIR/ckan" ]; then
COMMAND=ckan
elif [ -f "$ENV_DIR/paster" ]; then
COMMAND=paster
fi
else
# if no virtualenv is active, try the system path
if (which ckan > /dev/null 2>&1); then
ENV_DIR=$(dirname $(which ckan))
COMMAND=ckan
elif (which paster > /dev/null 2>&1); then
ENV_DIR=$(dirname $(which paster))
COMMAND=paster
else
echo "Unable to locate 'ckan' or 'paster' command" >&2
exit 1
fi
fi

if [ "$COMMAND" = "ckan" ]; then
# adjust args to match ckan expectations
COMMAND=$(echo "$1" | sed -e 's/create-test-data/seed/')
echo "Using 'ckan' command from $ENV_DIR with config ${CKAN_INI} to run $COMMAND..." >&2
shift
exec $ENV_DIR/ckan -c ${CKAN_INI} $COMMAND "$@" $CLICK_ARGS
elif [ "$COMMAND" = "paster" ]; then
# adjust args to match paster expectations
COMMAND=$1
echo "Using 'paster' command from $ENV_DIR with config ${CKAN_INI} to run $COMMAND..." >&2
shift
if [ "$1" = "show" ]; then shift; fi
exec $ENV_DIR/paster --plugin=$PASTER_PLUGIN $COMMAND "$@" -c ${CKAN_INI}
else
echo "Unable to locate 'ckan' or 'paster' command in $ENV_DIR" >&2
exit 1
fi
2 changes: 2 additions & 0 deletions ckanext/archiver/bin/common.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# encoding: utf-8

from __future__ import print_function
import os
import ckan.plugins as p
Expand Down
4 changes: 2 additions & 2 deletions ckanext/archiver/bin/migrate_task_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def migrate(options):
archival = Archival.get_for_resource(res.id)
if archival:
changed = None
for field, value in list(fields.items()):
for field, value in fields.items():
if getattr(archival, field) != value:
if options.write:
setattr(archival, field, value)
Expand All @@ -107,7 +107,7 @@ def migrate(options):
else:
archival = Archival.create(res.id)
if options.write:
for field, value in list(fields.items()):
for field, value in fields.items():
setattr(archival, field, value)
model.Session.add(archival)
add_stat('Added to archival table', res, stats)
Expand Down
10 changes: 5 additions & 5 deletions ckanext/archiver/bin/running_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
package_stats.increment('deleted')
else:
package_stats.increment('not deleted')
print package_stats.report()
print(package_stats.report())
> deleted: 30
> not deleted: 70

Expand All @@ -26,7 +26,7 @@
package_stats.add('deleted', package.name)
else:
package_stats.add('not deleted' package.name)
print package_stats.report()
print(package_stats.report())
> deleted: 30 pollution-uk, flood-regions, river-quality, ...
> not deleted: 70 spending-bristol, ...

Expand All @@ -43,7 +43,7 @@ class StatsCount(dict):
report_value_limit = 150

def __init__(self, *args, **kwargs):
self._start_time = datetime.datetime.now()
self._start_time = datetime.datetime.utcnow()
super(StatsCount, self).__init__(*args, **kwargs)

def _init_category(self, category):
Expand All @@ -65,7 +65,7 @@ def report(self, indent=1, order_by_title=False, show_time_taken=True):
lines = []
indent_str = '\t' * indent
report_dict = dict()
for category in list(self.keys()):
for category in self.keys():
report_dict[category] = self.report_value(category)

if order_by_title:
Expand All @@ -81,7 +81,7 @@ def report(self, indent=1, order_by_title=False, show_time_taken=True):
lines = [indent_str + 'None']

if show_time_taken:
time_taken = datetime.datetime.now() - self._start_time
time_taken = datetime.datetime.utcnow() - self._start_time
lines.append(indent_str + 'Time taken (h:m:s): %s' % time_taken)
return '\n'.join(lines)

Expand Down
4 changes: 3 additions & 1 deletion ckanext/archiver/cli.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# encoding: utf-8

import click
from ckanext.archiver import utils
from . import utils


def get_commands():
Expand Down
2 changes: 2 additions & 0 deletions ckanext/archiver/command_celery.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# encoding: utf-8

from __future__ import print_function
from future import standard_library
import sys
Expand Down
9 changes: 4 additions & 5 deletions ckanext/archiver/commands.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
# encoding: utf-8

from __future__ import print_function
import logging
import sys

from ckan.lib.cli import CkanCommand

from ckanext.archiver import utils


REQUESTS_HEADER = {'content-type': 'application/json'}
from . import utils


class Archiver(CkanCommand):
Expand All @@ -27,7 +26,7 @@ class Archiver(CkanCommand):
package or group, if specified

paster archiver update-test [{package-name/id}|{group-name/id}]
- Does an archive in the current process i.e. avoiding Celery queue
- Does an archive in the current process i.e. avoiding worker queue
so that you can test on the command-line more easily.

paster archiver clean-status
Expand Down
6 changes: 4 additions & 2 deletions ckanext/archiver/lib.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# encoding: utf-8

from builtins import str
import logging
import ckan.plugins as p
Expand Down Expand Up @@ -31,14 +33,14 @@ def create_archiver_resource_task(resource, queue):

compat_enqueue('archiver.update_resource', update_resource, queue, [resource.id])

log.debug('Archival of resource put into celery queue %s: %s/%s url=%r',
log.debug('Archival of resource put into queue %s: %s/%s url=%r',
queue, package.name, resource.id, resource.url)


def create_archiver_package_task(package, queue):
compat_enqueue('archiver.update_package', update_package, queue, [package.id])

log.debug('Archival of package put into celery queue %s: %s',
log.debug('Archival of package put into queue %s: %s',
queue, package.name)


Expand Down
30 changes: 14 additions & 16 deletions ckanext/archiver/model.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import itertools
# encoding: utf-8

from builtins import str
from builtins import object
import uuid
from datetime import datetime
import uuid

from sqlalchemy import Column, MetaData
from sqlalchemy import types
Expand All @@ -27,29 +27,23 @@ def make_uuid():
# enum of all the archival statuses (singleton)
# NB Be very careful changing these status strings. They are also used in
# ckanext-qa tasks.py.
class Status(object):
class Status:
_instance = None

def __init__(self):
not_broken = {
self._by_id = {
# is_broken = False
0: 'Archived successfully',
1: 'Content has not changed',
}
broken = {
# is_broken = True
10: 'URL invalid',
11: 'URL request failed',
12: 'Download error',
}
not_sure = {
# is_broken = None i.e. not sure
21: 'Chose not to download',
22: 'Download failure',
23: 'System error during archival',
}
self._by_id = dict(itertools.chain(not_broken.items(), broken.items()))
self._by_id.update(not_sure)
self._by_text = dict((value, key)
for key, value in self._by_id.items())

Expand Down Expand Up @@ -86,6 +80,12 @@ def is_ok(cls, status_id):
False: 'Downloaded OK'}


def _get_status_by_id(status_id):
if status_id is None:
return None
return Status.by_id(status_id)


class Archival(Base):
"""
Details of the archival of resources. Has the filepath for successfully
Expand Down Expand Up @@ -118,7 +118,7 @@ class Archival(Base):
last_success = Column(types.DateTime)
failure_count = Column(types.Integer, default=0)

created = Column(types.DateTime, default=datetime.now)
created = Column(types.DateTime, default=datetime.utcnow)
updated = Column(types.DateTime)

def __repr__(self):
Expand Down Expand Up @@ -157,9 +157,7 @@ def create(cls, resource_id):

@property
def status(self):
if self.status_id is None:
return None
return Status.by_id(self.status_id)
return _get_status_by_id(self.status_id)

def as_dict(self):
context = {'model': model}
Expand Down Expand Up @@ -192,7 +190,7 @@ def aggregate_archivals_for_a_dataset(archivals):
archival_dict['reason'] = archival.reason

if archivals:
archival_dict['status'] = Status.by_id(archival_dict['status_id'])
archival_dict['status'] = _get_status_by_id(archival_dict['status_id'])
archival_dict['is_broken'] = \
Status.is_status_broken(archival_dict['status_id'])
return archival_dict
Expand Down
Loading