Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Protect community from confusable homoglyphs #1470

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
11 changes: 11 additions & 0 deletions liberapay/models/community.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from psycopg2 import IntegrityError

from liberapay.exceptions import CommunityAlreadyExists, InvalidCommunityName
from liberapay.utils.unconfusable import unconfusable_string


name_maxlength = 40
Expand Down Expand Up @@ -38,8 +39,18 @@ def create(cls, name, creator_id, lang='mul'):
name = unicodedata.normalize('NFKC', name)
if name_re.match(name) is None:
raise InvalidCommunityName(name)

try:
with cls.db.get_cursor() as cursor:
unconfusable_name = unconfusable_string(name)
all_names = cursor.all("""
SELECT name
FROM communities
""")
for existing_name in all_names:
if unconfusable_name == unconfusable_string(existing_name):
raise CommunityAlreadyExists

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a very inefficient implementation, the more communities there are the slower it becomes. Also you're repeating an identical function call (cls._unconfusable(name)) in every loop iteration.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed here: 9b496bc

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You only fixed the repeated call, not the wider issue: this implementation is still inefficient.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which part is the most inefficient:

  • the database query?
  • the unconfusable lookup?
  • the unconfusable_string function?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fetching of all the existing community names from the database and the subsequent loop. That's linear complexity, O(n), with n being the number of existing communities.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should probably do something like this instead:

                possible_collisions = get_confusables(name)
                possible_collisions.append(name)
                collides_with = cursor.one("""
                    SELECT name
                      FROM communities
                     WHERE name IN %s
                     LIMIT 1
                """, (tuple(possible_collisions),))
                if collides_with:
                    raise CommunityAlreadyExists(collides_with)

p_id = cursor.one("""
INSERT INTO participants
(kind, status, join_time)
Expand Down
13 changes: 13 additions & 0 deletions liberapay/utils/unconfusable.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from confusable_homoglyphs import confusables

# Convert an Unicode string to its equivalent replacing all confusable homoglyphs
# to its common/latin equivalent
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a comment, not a docstring.

def unconfusable_string(s):
unconfusable_string = ''
for c in s:
confusable = confusables.is_confusable(c, preferred_aliases=['COMMON', 'LATIN'])
if confusable:
# if the character is confusable we replace it with the first prefered alias
c = confusable[0]['homoglyphs'][0]['c']
unconfusable_string += c
return unconfusable_string
4 changes: 4 additions & 0 deletions requirements_base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -243,3 +243,7 @@ cryptography==2.4.2 \
asn1crypto==0.24.0 \
--hash=sha256:2f1adbb7546ed199e3c90ef23ec95c5cf3585bac7d11fb7eb562a3fe89c64e87 \
--hash=sha256:9d5c20441baf0cb60a4ac34cc447c6c189024b6b4c6cd7877034f4965c464e49

confusable_homoglyphs==3.2.0 \
--hash=sha256:3b4a0d9fa510669498820c91a0bfc0c327568cecec90648cf3819d4a6fc6a751 \
--hash=sha256:e3ce611028d882b74a5faa69e3cbb5bd4dcd9f69936da6e73d33eda42c917944
MartinDelille marked this conversation as resolved.
Show resolved Hide resolved
18 changes: 17 additions & 1 deletion tests/py/test_communities.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json

from liberapay.exceptions import AuthRequired
from liberapay.exceptions import AuthRequired, CommunityAlreadyExists
from liberapay.models.community import Community
from liberapay.testing import Harness

Expand Down Expand Up @@ -101,6 +101,22 @@ def test_join_and_leave(self):
communities = self.bob.get_communities()
assert len(communities) == 0

def test_create_community_already_taken(self):
with self.assertRaises(CommunityAlreadyExists):
Community.create('test', self.alice.id)

def test_create_community_already_taken_is_case_insensitive(self):
with self.assertRaises(CommunityAlreadyExists):
Community.create('TeSt', self.alice.id)

def test_create_community_already_taken_with_confusable_homoglyphs(self):
latin_string = 'AlaskaJazz'
mixed_string = 'ΑlaskaJazz'

Community.create(latin_string, self.bob.id)
with self.assertRaises(CommunityAlreadyExists):
Community.create(mixed_string, self.alice.id)


class TestCommunityEdit(Harness):

Expand Down
13 changes: 12 additions & 1 deletion tests/py/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from liberapay import utils
from liberapay.i18n.currencies import Money, MoneyBasket
from liberapay.testing import Harness
from liberapay.utils import markdown, b64encode_s, b64decode_s, cbor
from liberapay.utils import markdown, b64encode_s, b64decode_s, cbor, unconfusable
from liberapay.wireup import CSP


Expand Down Expand Up @@ -244,3 +244,14 @@ def test_csp_handles_valueless_directives_correctly(self):
csp2 = CSP(csp)
assert csp == csp2
assert csp2.directives[b'upgrade-insecure-requests'] == b''

# Unconfusable
# ============

def test_unconfusable_string(self):
self.assertEqual('user2', unconfusable.unconfusable_string('user2'))
self.assertEqual('alice', unconfusable.unconfusable_string('alice'))
latin_string = 'AlaskaJazz'
mixed_string = 'ΑlaskaJazz'
self.assertNotEqual(latin_string, mixed_string)
self.assertEqual(latin_string, unconfusable.unconfusable_string(mixed_string))