Skip to content
This repository was archived by the owner on Jun 3, 2023. It is now read-only.

Handle marshaling and emitting unicode encoded byte strings #16

@jashugan

Description

@jashugan

I'm having problems encoding unicode correctly when using transit. I would expect that it would work similarly to the json encoder when using transit with the json encoder. Note below that when encoding a unicode string the json encoder consistently provides the same output. The transit encoder fails on the last two variations and produces different output from the json encoder on the first variation.

In [1]: import json

In [2]: json.dumps(u'Av. Zañartu 1482, Ñuñoa, Santiago, Chile, 7780272')
Out[2]: '"Av. Za\\u00f1artu 1482, \\u00d1u\\u00f1oa, Santiago, Chile, 7780272"'

In [3]: json.dumps('Av. Zañartu 1482, Ñuñoa, Santiago, Chile, 7780272')
Out[3]: '"Av. Za\\u00f1artu 1482, \\u00d1u\\u00f1oa, Santiago, Chile, 7780272"'

In [4]: json.dumps(u'Av. Zañartu 1482, Ñuñoa, Santiago, Chile, 7780272'.encode('utf-8'))
Out[4]: '"Av. Za\\u00f1artu 1482, \\u00d1u\\u00f1oa, Santiago, Chile, 7780272"'

In [5]: from transit.writer import Writer

In [6]: from StringIO import StringIO

In [9]: def transit_dumps(value):
   ...:     io = StringIO()
   ...:     writer = Writer(io, 'json')
   ...:     writer.write(value)
   ...:     return io.getvalue()
   ...: 

In [10]: transit_dumps(u'Av. Zañartu 1482, Ñuñoa, Santiago, Chile, 7780272')
Out[10]: u'["~#\'","Av. Za\xf1artu 1482, \xd1u\xf1oa, Santiago, Chile, 7780272"]'

In [11]: transit_dumps(u'Av. Zañartu 1482, Ñuñoa, Santiago, Chile, 7780272'.encode('utf-8'))
---------------------------------------------------------------------------
UnicodeDecodeError                        Traceback (most recent call last)
<ipython-input-11-91a52539e1c8> in <module>()
----> 1 transit_dumps(u'Av. Zañartu 1482, Ñuñoa, Santiago, Chile, 7780272'.encode('utf-8'))

<ipython-input-9-f0149f39269b> in transit_dumps(value)
      2     io = StringIO()
      3     writer = Writer(io, 'json')
----> 4     writer.write(value)
      5     return io.getvalue()
      6 

/Users/nithin/.virtualenvs/weblims/lib/python2.7/site-packages/transit/writer.pyc in write(self, obj)
     42         the 'io' source.
     43         """
---> 44         self.marshaler.marshal_top(obj)
     45 
     46     def register(self, obj_type, handler_class):

/Users/nithin/.virtualenvs/weblims/lib/python2.7/site-packages/transit/writer.pyc in marshal_top(self, obj, cache)
    196         if tag:
    197             if len(tag) == 1:
--> 198                 self.marshal(TaggedValue(QUOTE, obj), False, cache)
    199             else:
    200                 self.marshal(obj, False, cache)

/Users/nithin/.virtualenvs/weblims/lib/python2.7/site-packages/transit/writer.pyc in marshal(self, obj, as_map_key, cache)
    179 
    180         if f:
--> 181             f(self, rep, as_map_key, cache)
    182         else:
    183             self.emit_encoded(tag, handler, obj, as_map_key, cache)

/Users/nithin/.virtualenvs/weblims/lib/python2.7/site-packages/transit/writer.pyc in <lambda>(self, rep, _, cache)
    227                     "n": lambda self, rep, as_map_key, cache: Marshaler.emit_int(self, "n", rep, as_map_key, cache),
    228                     "d": Marshaler.emit_double,
--> 229                     "'": lambda self, rep, _, cache: Marshaler.emit_tagged(self, "'", rep, cache),
    230                     "array": Marshaler.emit_array,
    231                     "map": Marshaler.dispatch_map}

/Users/nithin/.virtualenvs/weblims/lib/python2.7/site-packages/transit/writer.pyc in emit_tagged(self, tag, rep, cache)
    141         self.emit_array_start(2)
    142         self.emit_string(ESC, "#", tag, False, cache)
--> 143         self.marshal(rep, False, cache)
    144         self.emit_array_end()
    145 

/Users/nithin/.virtualenvs/weblims/lib/python2.7/site-packages/transit/writer.pyc in marshal(self, obj, as_map_key, cache)
    179 
    180         if f:
--> 181             f(self, rep, as_map_key, cache)
    182         else:
    183             self.emit_encoded(tag, handler, obj, as_map_key, cache)

/Users/nithin/.virtualenvs/weblims/lib/python2.7/site-packages/transit/writer.pyc in <lambda>(self, rep, as_map_key, cache)
    223 marshal_dispatch = {"_": Marshaler.emit_nil,
    224                     "?": Marshaler.emit_boolean,
--> 225                     "s": lambda self, rep, as_map_key, cache: Marshaler.emit_string(self, "", "", escape(rep), as_map_key, cache),
    226                     "i": lambda self, rep, as_map_key, cache: Marshaler.emit_int(self, "i", rep, as_map_key, cache),
    227                     "n": lambda self, rep, as_map_key, cache: Marshaler.emit_int(self, "n", rep, as_map_key, cache),

/Users/nithin/.virtualenvs/weblims/lib/python2.7/site-packages/transit/writer.pyc in emit_string(self, prefix, tag, string, as_map_key, cache)
    106         #if "cache_enabled" in self.opts and is_cacheable(encoded, as_map_key):
    107         #    return self.emit_object(cache.value_to_key[encoded], as_map_key)
--> 108         return self.emit_object(encoded, as_map_key)
    109 
    110     def emit_boolean(self, b, as_map_key, cache):

/Users/nithin/.virtualenvs/weblims/lib/python2.7/site-packages/transit/writer.pyc in emit_object(self, obj, as_map_key)
    357             self.io.write(u"".join([(c.encode("unicode_escape"))
    358                                     if c in JSON_ESCAPED_CHARS
--> 359                                     else c for c in obj]).replace("\"", "\\\""))
    360             self.io.write(u"\"")
    361         elif tp is int or tp is long or tp is float:

UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128)

In [12]: transit_dumps('Av. Zañartu 1482, Ñuñoa, Santiago, Chile, 7780272')
---------------------------------------------------------------------------
UnicodeDecodeError                        Traceback (most recent call last)
<ipython-input-12-e53e995c1ef3> in <module>()
----> 1 transit_dumps('Av. Zañartu 1482, Ñuñoa, Santiago, Chile, 7780272')

<ipython-input-9-f0149f39269b> in transit_dumps(value)
      2     io = StringIO()
      3     writer = Writer(io, 'json')
----> 4     writer.write(value)
      5     return io.getvalue()
      6 

/Users/nithin/.virtualenvs/weblims/lib/python2.7/site-packages/transit/writer.pyc in write(self, obj)
     42         the 'io' source.
     43         """
---> 44         self.marshaler.marshal_top(obj)
     45 
     46     def register(self, obj_type, handler_class):

/Users/nithin/.virtualenvs/weblims/lib/python2.7/site-packages/transit/writer.pyc in marshal_top(self, obj, cache)
    196         if tag:
    197             if len(tag) == 1:
--> 198                 self.marshal(TaggedValue(QUOTE, obj), False, cache)
    199             else:
    200                 self.marshal(obj, False, cache)

/Users/nithin/.virtualenvs/weblims/lib/python2.7/site-packages/transit/writer.pyc in marshal(self, obj, as_map_key, cache)
    179 
    180         if f:
--> 181             f(self, rep, as_map_key, cache)
    182         else:
    183             self.emit_encoded(tag, handler, obj, as_map_key, cache)

/Users/nithin/.virtualenvs/weblims/lib/python2.7/site-packages/transit/writer.pyc in <lambda>(self, rep, _, cache)
    227                     "n": lambda self, rep, as_map_key, cache: Marshaler.emit_int(self, "n", rep, as_map_key, cache),
    228                     "d": Marshaler.emit_double,
--> 229                     "'": lambda self, rep, _, cache: Marshaler.emit_tagged(self, "'", rep, cache),
    230                     "array": Marshaler.emit_array,
    231                     "map": Marshaler.dispatch_map}

/Users/nithin/.virtualenvs/weblims/lib/python2.7/site-packages/transit/writer.pyc in emit_tagged(self, tag, rep, cache)
    141         self.emit_array_start(2)
    142         self.emit_string(ESC, "#", tag, False, cache)
--> 143         self.marshal(rep, False, cache)
    144         self.emit_array_end()
    145 

/Users/nithin/.virtualenvs/weblims/lib/python2.7/site-packages/transit/writer.pyc in marshal(self, obj, as_map_key, cache)
    179 
    180         if f:
--> 181             f(self, rep, as_map_key, cache)
    182         else:
    183             self.emit_encoded(tag, handler, obj, as_map_key, cache)

/Users/nithin/.virtualenvs/weblims/lib/python2.7/site-packages/transit/writer.pyc in <lambda>(self, rep, as_map_key, cache)
    223 marshal_dispatch = {"_": Marshaler.emit_nil,
    224                     "?": Marshaler.emit_boolean,
--> 225                     "s": lambda self, rep, as_map_key, cache: Marshaler.emit_string(self, "", "", escape(rep), as_map_key, cache),
    226                     "i": lambda self, rep, as_map_key, cache: Marshaler.emit_int(self, "i", rep, as_map_key, cache),
    227                     "n": lambda self, rep, as_map_key, cache: Marshaler.emit_int(self, "n", rep, as_map_key, cache),

/Users/nithin/.virtualenvs/weblims/lib/python2.7/site-packages/transit/writer.pyc in emit_string(self, prefix, tag, string, as_map_key, cache)
    106         #if "cache_enabled" in self.opts and is_cacheable(encoded, as_map_key):
    107         #    return self.emit_object(cache.value_to_key[encoded], as_map_key)
--> 108         return self.emit_object(encoded, as_map_key)
    109 
    110     def emit_boolean(self, b, as_map_key, cache):

/Users/nithin/.virtualenvs/weblims/lib/python2.7/site-packages/transit/writer.pyc in emit_object(self, obj, as_map_key)
    357             self.io.write(u"".join([(c.encode("unicode_escape"))
    358                                     if c in JSON_ESCAPED_CHARS
--> 359                                     else c for c in obj]).replace("\"", "\\\""))
    360             self.io.write(u"\"")
    361         elif tp is int or tp is long or tp is float:

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions