Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PICARD-1877: Support language for lyrics tags #1650

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 11 additions & 14 deletions picard/formats/id3.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@
encode_filename,
sanitize_date,
)
from picard.util.tags import parse_comment_tag
from picard.util.tags import (
create_lang_desc_tag,
parse_comment_tag,
parse_lang_desc_tag,
)


id3.GRP1 = compatid3.GRP1
Expand Down Expand Up @@ -324,9 +328,7 @@ def _load(self, filename):
for text in frame.text:
metadata.add(name, text)
elif frameid == 'USLT':
name = 'lyrics'
if frame.desc:
name += ':%s' % frame.desc
name = create_lang_desc_tag('lyrics', frame.lang, frame.desc)
metadata.add(name, frame.text)
elif frameid == 'UFID' and frame.owner == 'http://musicbrainz.org':
metadata['musicbrainz_recordingid'] = frame.data.decode('ascii', 'ignore')
Expand Down Expand Up @@ -444,12 +446,9 @@ def _save(self, filename, metadata):
else:
tags.add(id3.COMM(encoding=encoding, desc=desc, lang=lang, text=values))
elif name.startswith('lyrics:') or name == 'lyrics':
if ':' in name:
desc = name.split(':', 1)[1]
else:
desc = ''
(lang, desc) = parse_lang_desc_tag(name)
for value in values:
tags.add(id3.USLT(encoding=encoding, desc=desc, text=value))
tags.add(id3.USLT(encoding=encoding, desc=desc, lang=lang, text=value))
elif name in self._rtipl_roles:
for value in values:
tipl.people.append([self._rtipl_roles[name], value])
Expand Down Expand Up @@ -556,12 +555,10 @@ def _remove_deleted_tags(self, metadata, tags):
and frame.lang == lang):
del tags[key]
elif name.startswith('lyrics:') or name == 'lyrics':
if ':' in name:
desc = name.split(':', 1)[1]
else:
desc = ''
(lang, desc) = parse_lang_desc_tag(name)
for key, frame in list(tags.items()):
if frame.FrameID == 'USLT' and frame.desc == desc:
if (frame.FrameID == 'USLT' and frame.desc == desc
and frame.lang == lang):
del tags[key]
elif name in self._rtipl_roles:
role = self._rtipl_roles[name]
Expand Down
35 changes: 34 additions & 1 deletion picard/util/tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def display_tag_name(name):
def parse_comment_tag(name): # noqa: E302
"""
Parses a tag name like "comment:XXX:desc", where XXX is the language.
If language is not set ("comment:desc") "eng" is assumed as default.
If language is not set ("comment:desc") default_language is used.
Returns a (lang, desc) tuple.
"""
try:
Expand All @@ -159,3 +159,36 @@ def parse_comment_tag(name): # noqa: E302
lang = match.group(1)
desc = desc[4:]
return (lang, desc)


RE_LYRICS_TAG = re.compile('^(?P<name>[^:]+)(?::(?P<lang>[a-zA-Z]{3})?)?(?::(?P<desc>.*))?$')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In case of 'lyrics:eng' it will return empty desc and lang='eng', but in case of 'lyrics:123' it will return empty lang and '123' as desc. Is this the expected behavior?

>>> match = RE_LYRICS_TAG.match('a:123')
>>> match.groups()
('a', None, '123')
>>> match = RE_LYRICS_TAG.match('a:eng')
>>> match.groups()
('a', 'eng', None)
>>> match = RE_LYRICS_TAG.match('a:eng:')
>>> match.groups()
('a', 'eng', '')

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It kind of was intentional, using the second element as language only if it is a valid 3 letter language code (as this is what is also supported in ID3). It also serves as a fallback if one should indeed use a lyrics:something tag with something being the description. But my thinking was also that lyrics with description are probably not that common, but setting the language for lyrics is more important. That's probably a bit different to how it is with comments!?

Anyway, this is really messy and I'm unsure how to proceed. Ideally I would like both comment and lyrics the same, ideally without breaking someones existing use case. But maybe we need to break things a bit here.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not against breaking changes if required to improve overall user experience on the long term. It just have to be well documented.

def parse_lang_desc_tag(name, default_language='xxx'): # noqa: E302
"""
Parses a tag name like with a language and description such as "lyrics:XXX:desc".

Language must be a 3 letter language code as defined in ISO639-2, with the special
value 'xxx' being used if the language is not specified. If language is not set ("lyrics"
or "lyrics::desc") default_language is used.
Language and description are optional.

Returns a (lang, desc) tuple.
"""
match = RE_LYRICS_TAG.match(name)
lang = default_language
desc = ''
if match:
lang = match.group('lang') or default_language
desc = match.group('desc') or ''
return (lang, desc)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps return lang.lower() here, to simplify comparisons elsewhere.



def create_lang_desc_tag(name, language='xxx', description='', default_language='xxx'):
name_parts = [name, ]
if language and language.lower() != default_language:
name_parts.append(language)
elif description:
# Add empty language part if description is also set
name_parts.append('')
if description:
name_parts.append(description)
return ':'.join(name_parts)
2 changes: 1 addition & 1 deletion test/formats/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ def test_delete_tags_with_empty_description(self):
@skipUnlessTestfile
def test_delete_tags_with_description(self):
for key in (
'comment:foo', 'comment:de:foo', 'performer:foo', 'lyrics:foo',
'comment:foo', 'comment:deu:foo', 'performer:foo', 'lyrics::foo', 'lyrics:jpn:foo'
'comment:a*', 'comment:a[', 'performer:(x)', 'performer: Ä é '
):
if not self.format.supports_tag(key):
Expand Down
19 changes: 17 additions & 2 deletions test/formats/test_id3.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,11 +333,26 @@ def test_ci_tags_preserve_case(self):
self.assertEqual(1, len(raw_metadata['TXXX:Replaygain_Album_Peak'].text))
self.assertNotIn('TXXX:REPLAYGAIN_ALBUM_PEAK', raw_metadata)

@skipUnlessTestfile
def test_lyrics_with_language(self):
metadata = Metadata({'lyrics:jpn': 'bar'})
loaded_metadata = save_and_load_metadata(self.filename, metadata)
self.assertIn('lyrics:jpn', loaded_metadata)
self.assertEqual(metadata['lyrics:jpn'], loaded_metadata['lyrics:jpn'])

@skipUnlessTestfile
def test_lyrics_with_description(self):
metadata = Metadata({'lyrics:foo': 'bar'})
metadata = Metadata({'lyrics::foo': 'bar'})
loaded_metadata = save_and_load_metadata(self.filename, metadata)
self.assertIn('lyrics::foo', loaded_metadata)
self.assertEqual(metadata['lyrics::foo'], loaded_metadata['lyrics::foo'])

@skipUnlessTestfile
def test_lyrics_with_language_and_description(self):
metadata = Metadata({'lyrics:fra:foo': 'bar'})
loaded_metadata = save_and_load_metadata(self.filename, metadata)
self.assertEqual(metadata['lyrics:foo'], loaded_metadata['lyrics:foo'])
self.assertIn('lyrics:fra:foo', loaded_metadata)
self.assertEqual(metadata['lyrics:fra:foo'], loaded_metadata['lyrics:fra:foo'])

@skipUnlessTestfile
def test_invalid_track_and_discnumber(self):
Expand Down
18 changes: 18 additions & 0 deletions test/test_util_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@
from test.picardtestcase import PicardTestCase

from picard.util.tags import (
create_lang_desc_tag,
display_tag_name,
parse_comment_tag,
parse_lang_desc_tag,
)


Expand All @@ -37,3 +39,19 @@ def test_parse_comment_tag(self):
self.assertEqual(('XXX', 'foo'), parse_comment_tag('comment:XXX:foo'))
self.assertEqual(('eng', 'foo'), parse_comment_tag('comment:foo'))
self.assertEqual(('eng', ''), parse_comment_tag('comment'))

def test_parse_lang_desc_tag(self):
self.assertEqual(('jpn', ''), parse_lang_desc_tag('lyrics:jpn'))
self.assertEqual(('jpn', ''), parse_lang_desc_tag('lyrics:jpn:'))
self.assertEqual(('jpn', 'foo'), parse_lang_desc_tag('lyrics:jpn:foo'))
self.assertEqual(('xxx', 'foo'), parse_lang_desc_tag('lyrics::foo'))
self.assertEqual(('xxx', 'notalanguage:foo'), parse_lang_desc_tag('lyrics:notalanguage:foo'))

def test_create_lang_desc_tag(self):
self.assertEqual('comment', create_lang_desc_tag('comment'))
self.assertEqual('comment:eng', create_lang_desc_tag('comment', language='eng'))
self.assertEqual('comment::foo', create_lang_desc_tag('comment', description='foo'))
self.assertEqual('lyrics:jpn:foo', create_lang_desc_tag(
'lyrics', language='jpn', description='foo'))
self.assertEqual('lyrics::foo', create_lang_desc_tag(
'lyrics', language='jpn', description='foo', default_language='jpn'))