Source code for chardetails.core

#! /usr/bin/env python
# -*- coding: utf-8 -*-
# Unicode Character Details
# Copyright 2008-2010 Santhosh Thottingal <santhosh.thottingal@gmail.com>
# http://www.smc.org.in
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# If you find any bugs or have any suggestions email:
# santhosh.thottingal@gmail.com
# URL: http://www.smc.org.in

import os
import unicodedata

__all__ = ['CharDetails', 'getInstance']


[docs]class CharDetails: """ Shows the Unicode Character Details of a given character """
[docs] def getdetails(self, text): """ Gives details of all charecters in the given string. :param text: The unicode string to be examined. :type text: str. :returns: dictionary with details. :: >>> import chardetails.getInstance >>> a = getInstance() >>> a.getdetails(u"run") {'Characters': [u'r', u'u', u'n'], u'n': {'AlphaNumeric': 'True', 'Alphabet': 'True', 'Canonical Decomposition': '', 'Code point': "u'n'", 'Digit': 'False', 'HTML Entity': '110', 'Name': 'LATIN SMALL LETTER N'}, u'r': {'AlphaNumeric': 'True', 'Alphabet': 'True', 'Canonical Decomposition': '', 'Code point': "u'r'", 'Digit': 'False', 'HTML Entity': '114', 'Name': 'LATIN SMALL LETTER R'}, u'u': {'AlphaNumeric': 'True', 'Alphabet': 'True', 'Canonical Decomposition': '', 'Code point': "u'u'", 'Digit': 'False', 'HTML Entity': '117', 'Name': 'LATIN SMALL LETTER U'}} """ chardetails = {} for character in text: chardetails[character] = {} chardetails[character]['Name'] = unicodedata.name(character) chardetails[character]['HTML Entity'] = str(ord(character)) chardetails[character]['Code point'] = repr(character) try: chardetails[character]['Numeric Value'] = \ unicodedata.numeric(character) except: pass try: chardetails[character]['Decimal Value'] = \ unicodedata.decimal(character) except: pass try: chardetails[character]['Digit'] = unicodedata.digit(mychar) except: pass chardetails[character]['Alphabet'] = str(character.isalpha()) chardetails[character]['Digit'] = str(character.isdigit()) chardetails[character]['AlphaNumeric'] = str(character.isalnum()) chardetails[character]['Canonical Decomposition'] = \ unicodedata.decomposition(character) chardetails['Characters'] = list(text) return chardetails
[docs] def get_module_name(self): """Returns modules Name """ return "Unicode Character Details"
[docs] def get_info(self): """ Gives Info on the module """ return "Shows the Unicode Character Details of a given character"
[docs]def getInstance(): """Returns an instance of :class:`CharDetails` class.""" return CharDetails()

Related Topics

This Page