Logo Search packages:      
Sourcecode: wapiti version File versions  Download package

def BeautifulSoup::UnicodeDammit::_toUnicode (   self,
  data,
  encoding 
) [private]

Given a string and its encoding, decodes the string into Unicode.
%encoding is a string recognized by encodings.aliases

Definition at line 1641 of file BeautifulSoup.py.

01641                                         :
        '''Given a string and its encoding, decodes the string into Unicode.
        %encoding is a string recognized by encodings.aliases'''

        # strip Byte Order Mark (if present)
        if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
               and (data[2:4] != '\x00\x00'):
            encoding = 'utf-16be'
            data = data[2:]
        elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \
                 and (data[2:4] != '\x00\x00'):
            encoding = 'utf-16le'
            data = data[2:]
        elif data[:3] == '\xef\xbb\xbf':
            encoding = 'utf-8'
            data = data[3:]
        elif data[:4] == '\x00\x00\xfe\xff':
            encoding = 'utf-32be'
            data = data[4:]
        elif data[:4] == '\xff\xfe\x00\x00':
            encoding = 'utf-32le'
            data = data[4:]
        newdata = unicode(data, encoding)
        return newdata
    
    def _detectEncoding(self, xml_data):


Generated by  Doxygen 1.6.0   Back to index