Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion app/main/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
sentence_case,
)
from app.main.validators import (
CanEncode,
CannotContainURLsOrLinks,
CharactersNotAllowed,
CommonlyUsedPassword,
Expand Down Expand Up @@ -2532,11 +2533,16 @@ class CallbackForm(StripWhitespaceForm):
r"(?:#[\w\-._~%!$&'()*+,;=:@/?]*)?$",
message="Must be a valid https URL",
),
CanEncode(field_type="a web address"),
],
)
bearer_token = GovukPasswordField(
"Bearer token",
validators=[DataRequired(message="Cannot be empty"), Length(min=10, thing="the bearer token")],
validators=[
DataRequired(message="Cannot be empty"),
Length(min=10, thing="the bearer token"),
CanEncode(field_type="a bearer token"),
],
)

def validate(self, *args, **kwargs):
Expand Down
40 changes: 40 additions & 0 deletions app/main/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,46 @@
from app.utils.user import is_gov_user


class CanEncode:
"""
Validates that the field data can be encoded into a specific character set.
"""

def __init__(self, encoding="latin-1", field_type=None, message=None):
self.encoding = encoding
self.field_type = field_type
self.message = message

def __call__(self, form, field):
if field.data:
unsupported = set()
for char in field.data:
try:
char.encode(self.encoding)
except UnicodeEncodeError:
unsupported.add(char)
unsupported_char_list = list(unsupported)
if unsupported_char_list:
unsupported_char_list.sort()

field_type = "this field"
if self.field_type is not None:
field_type = self.field_type

if unsupported_char_list != []:
message = self.message
if message is None:
message = (
"You cannot use {} in {}. You must use percent encoding if you want to include {}.".format(
formatted_list(unsupported_char_list, conjunction="or", before_each="", after_each=""),
field_type,
"these characters" if len(unsupported_char_list) > 1 else "this character",
)
)

raise ValidationError(message)


class CommonlyUsedPassword:
def __init__(self, message=None):
if not message:
Expand Down
9 changes: 8 additions & 1 deletion tests/app/main/test_forms.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest

from app.main.forms import OrderableFieldsForm, StripWhitespaceStringField
from app.main.forms import CallbackForm, OrderableFieldsForm, StripWhitespaceStringField
from app.main.validators import CanEncode
from tests.conftest import set_config_values


Expand Down Expand Up @@ -41,3 +42,9 @@ class TestForm(OrderableFieldsForm):
with set_config_values(notify_admin, {"WTF_CSRF_ENABLED": True}):
form = TestForm()
assert [field.name for field in form] == ["csrf_token", "field2", "field1"]


def test_callbackform_has_can_encode_validators(notify_admin, client_request):
cbf = CallbackForm()
assert any(isinstance(x, CanEncode) for x in cbf.url.validators)
assert any(isinstance(x, CanEncode) for x in cbf.bearer_token.validators)
37 changes: 37 additions & 0 deletions tests/app/main/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from wtforms import ValidationError

from app.main.validators import (
CanEncode,
CharactersNotAllowed,
MustContainAlphanumericCharacters,
NoCommasInPlaceHolders,
Expand Down Expand Up @@ -217,3 +218,39 @@ def test_string_cannot_contain_string_with_custom_error_message():

assert str(error.value) == "No sequences please"
assert mock_field.error_summary_messages == ["No sequences in %s please"]


@pytest.mark.parametrize(
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These tests are good, but they only check that the CanEncode validator works as expected. They don’t test that it’s being used on the bearer token field.

Copy link
Copy Markdown
Contributor Author

@whpearson whpearson Jun 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pushed up a version that checks whether CallbackForms have the CanEncode validators attached to the fields. I'd rather not test the functionality twice (to avoid duplication of effort if it changed).

I couldn't find examples of this kind of code (our forms tests are quite small), so pointers on how to do it properly appreciated.

"data, err_msg",
[
(
"📵 ghi",
"You cannot use 📵 in this field. You must use percent encoding if you want to include this character.",
),
(
"∆ abc 📲",
"You cannot use ∆ or 📲 in this field. You must use percent encoding if you want to include these characters.", # noqa
),
],
)
def test_can_encode_validation(data, err_msg, client_request):
with pytest.raises(ValidationError) as error:
CanEncode()(None, _gen_mock_field(data))

assert str(error.value) == err_msg


def test_string_can_encode_with_custom_field_type():
mock_field = _gen_mock_field("∆ abc 📲", error_summary_messages=[])
with pytest.raises(ValidationError) as error:
CanEncode(field_type="a web address")(None, mock_field)

assert (
str(error.value)
== "You cannot use ∆ or 📲 in a web address. You must use percent encoding if you want to include these characters." # noqa
)


@pytest.mark.parametrize("string", ["", "Résumé", "München"])
def test_string_can_encode_does_not_raise(string):
CanEncode()(None, _gen_mock_field(string))
Loading