Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
### 1.0.3

* Fix UCS-2 encoding: fixes #49 and #53

### 1.0.2

* Add `tox.ini`, support `2.6`, `2.7`, `3.4`, `3.5`, `3.6` and `3.7`
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name='python-smpplib',
version='1.0.2',
version='1.0.3',
url='https://github.com/podshumok/python-smpplib',
description='SMPP library for python',
long_description=open('README.md', 'rt').read(),
Expand Down
22 changes: 12 additions & 10 deletions smpplib/consts.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import six

EMPTY_STRING = six.b('')
NULL_STRING = six.b('\0')
EMPTY_STRING = b''
NULL_STRING = b'\0'


# Message part lengths in different encodings.
SEVENBIT_SIZE = 160
EIGHTBIT_SIZE = 140
UCS2_SIZE = 70
SEVENBIT_MP_SIZE = SEVENBIT_SIZE - 7
EIGHTBIT_MP_SIZE = EIGHTBIT_SIZE - 6
UCS2_MP_SIZE = UCS2_SIZE - 3
# SMPP 3.4, 2.2.1.2
SEVENBIT_LENGTH = 160
EIGHTBIT_LENGTH = 140
UCS2_LENGTH = 70

MULTIPART_HEADER_SIZE = 6

SEVENBIT_PART_SIZE = SEVENBIT_LENGTH - 7 # TODO: where does 7 come from?
EIGHTBIT_PART_SIZE = 140 - MULTIPART_HEADER_SIZE
UCS2_PART_SIZE = 140 - MULTIPART_HEADER_SIZE # must be an even number anyway


# SMPP error codes.
Expand Down
14 changes: 8 additions & 6 deletions smpplib/gsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,21 @@ def make_parts(text, encoding=consts.SMPP_ENCODING_DEFAULT):
"""Returns tuple(parts, encoding, esm_class)"""
try:
# Try to encode with the user-defined encoding first.
encode, split_size, part_size = ENCODINGS[encoding]
encode, split_length, part_size = ENCODINGS[encoding]
encoded_text = encode(text)
except KeyError:
raise NotImplementedError('encoding is not supported: %s' % encoding)
except UnicodeError:
# Fallback to UCS-2.
encoding = consts.SMPP_ENCODING_ISO10646
encode, split_size, part_size = ENCODINGS[encoding]
encode, split_length, part_size = ENCODINGS[encoding]
encoded_text = encode(text)

if len(text) > split_size:
if len(text) > split_length:
# Split the text into well-formed parts.
esm_class = consts.SMPP_GSMFEAT_UDHI
# FIXME: 7-bit encoding has variable-length characters.
# FIXME: it means that a character may be broken by splitting.
Copy link
Collaborator Author

@eigenein eigenein Oct 15, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just had a thought that there's another issue too. We check len(text) but later we split encoded byte-string. It means that 2-byte characters are encountered as they were 1-octet characters.

parts = make_parts_encoded(encoded_text, part_size)
else:
# Normal message.
Expand Down Expand Up @@ -56,9 +58,9 @@ def gsm_encode(plaintext):
# Map GSM encoding into a tuple of encode function, maximum single message size and a part size.
# Add new entry here should you need to use another encoding.
ENCODINGS = {
consts.SMPP_ENCODING_DEFAULT: (gsm_encode, consts.SEVENBIT_SIZE, consts.SEVENBIT_MP_SIZE),
consts.SMPP_ENCODING_ISO88591: (lambda text: text.encode('iso-8859-1'), consts.EIGHTBIT_SIZE, consts.EIGHTBIT_MP_SIZE),
consts.SMPP_ENCODING_ISO10646: (lambda text: text.encode('utf-16-be'), consts.UCS2_SIZE, consts.UCS2_MP_SIZE),
consts.SMPP_ENCODING_DEFAULT: (gsm_encode, consts.SEVENBIT_LENGTH, consts.SEVENBIT_PART_SIZE),
consts.SMPP_ENCODING_ISO88591: (lambda text: text.encode('iso-8859-1'), consts.EIGHTBIT_LENGTH, consts.EIGHTBIT_PART_SIZE),
consts.SMPP_ENCODING_ISO10646: (lambda text: text.encode('utf-16-be'), consts.UCS2_LENGTH, consts.UCS2_PART_SIZE),
}


Expand Down
28 changes: 18 additions & 10 deletions tests/test_gsm.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
# -*- coding: utf8 -*-

import mock
import pytest
from pytest import mark, raises

from smpplib import consts
from smpplib.gsm import gsm_encode, make_parts, make_parts_encoded


@pytest.mark.parametrize('plaintext, encoded_text', [
@mark.parametrize('plaintext, encoded_text', [
(u'@', b'\x00'),
(u'^', b'\x1B\x14'),
])
def test_gsm_encode(plaintext, encoded_text):
assert gsm_encode(plaintext) == encoded_text


@pytest.mark.parametrize('plaintext', [
@mark.parametrize('plaintext', [
(u'Ая',),
])
def test_gsm_encode_unicode_error(plaintext):
with pytest.raises(UnicodeError):
with raises(UnicodeError):
gsm_encode(plaintext)


@pytest.mark.parametrize('plaintext, encoding, expected_parts, expected_encoding', [
@mark.parametrize('plaintext, encoding, expected_parts, expected_encoding', [
(u'@', consts.SMPP_ENCODING_DEFAULT, [b'\x00'], consts.SMPP_ENCODING_DEFAULT),
(u'Ая', consts.SMPP_ENCODING_DEFAULT, [b'\x04\x10\x04O'], consts.SMPP_ENCODING_ISO10646),
(u'é', consts.SMPP_ENCODING_ISO88591, [b'\xe9'], consts.SMPP_ENCODING_ISO88591),
Expand All @@ -32,10 +32,10 @@ def test_make_parts_single(plaintext, encoding, expected_parts, expected_encodin
assert make_parts(plaintext, encoding) == (expected_parts, expected_encoding, consts.SMPP_MSGTYPE_DEFAULT)


@pytest.mark.parametrize('plaintext, expected', [
(u'@' * consts.SEVENBIT_MP_SIZE * 2, [
b'\x05\x00\x03\x42\x02\x01' + b'\x00' * consts.SEVENBIT_MP_SIZE,
b'\x05\x00\x03\x42\x02\x02' + b'\x00' * consts.SEVENBIT_MP_SIZE,
@mark.parametrize('plaintext, expected', [
(u'@' * consts.SEVENBIT_PART_SIZE * 2, [
b'\x05\x00\x03\x42\x02\x01' + b'\x00' * consts.SEVENBIT_PART_SIZE,
b'\x05\x00\x03\x42\x02\x02' + b'\x00' * consts.SEVENBIT_PART_SIZE,
]),
])
def test_make_parts_multiple(plaintext, expected):
Expand All @@ -44,11 +44,19 @@ def test_make_parts_multiple(plaintext, expected):
assert make_parts(plaintext) == (expected, consts.SMPP_ENCODING_DEFAULT, consts.SMPP_GSMFEAT_UDHI)


@pytest.mark.parametrize('encoded_text, part_size, expected', [
@mark.parametrize('encoded_text, part_size, expected', [
(b'12345', 5, [b'\x05\x00\x03\x42\x01\x0112345']),
(b'12345', 2, [b'\x05\x00\x03\x42\x03\x0112', b'\x05\x00\x03\x42\x03\x0234', b'\x05\x00\x03\x42\x03\x035']),
])
def test_make_parts_encoded(encoded_text, part_size, expected):
with mock.patch('random.randint') as randint:
randint.return_value = 0x42
assert make_parts_encoded(encoded_text, part_size) == expected


@mark.parametrize('text, expected', [
(u'Привет мир!\n' * 10, 2),
])
def test_part_number(text, expected):
parts, _, _ = make_parts(text)
assert len(parts) == expected