btl_string.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. # author: David Harrison
  2. def split( s, delimiter = ' ', quote=['"',"'"], keep_quote = True):
  3. """analogous to str.split() except it supports quoted strings.
  4. Delimiter can be any positive length string.
  5. A quote begins on any character in 'quote', and ends on that
  6. same character. A quoted string is not split even if it
  7. contains character c or other quote characters in the quote
  8. argument.
  9. Iff keep_quote is true then quote's leading and trailing
  10. quote characters are left in the strings in the returned list."""
  11. assert type(s) == str
  12. assert type(delimiter) == str and len(delimiter) >= 1, "c='%s'" % c
  13. l = []
  14. sub = []
  15. quoted = None
  16. i = 0
  17. while i < len(s):
  18. c = s[i]
  19. # check for end-quote
  20. if quoted:
  21. if c == quoted:
  22. quoted = None
  23. if keep_quote:
  24. sub.append(c)
  25. else:
  26. sub.append(c)
  27. # check for start-quote.
  28. elif c in quote:
  29. quoted = c
  30. if keep_quote:
  31. sub.append(c)
  32. elif s[i:i+len(delimiter)] != delimiter:
  33. sub.append(c)
  34. else:
  35. i += (len(delimiter)-1)
  36. l.append("".join(sub))
  37. sub = []
  38. i += 1
  39. l.append("".join(sub))
  40. return l
  41. def remove(s,c):
  42. l = [i for i in s if i != c]
  43. return "".join(l)
  44. def printable(s):
  45. """make a string printable. Converts all non-printable ascii characters and all
  46. non-space whitespace to periods. This keeps a string to a fixed width when
  47. printing it. This is not meant for canonicalization. It is far more
  48. restrictive since it removes many things that might be representable.
  49. It is appropriate for generating debug output binary strings that might
  50. contain ascii substrings, like peer-id's. It explicitly excludes quotes
  51. and double quotes so that the string can be enclosed in quotes.
  52. """
  53. l = []
  54. for c in s:
  55. if ord(c) >= 0x20 and ord(c) < 0x7F and c != '"' and c != "'":
  56. l.append(c)
  57. else:
  58. l.append('.')
  59. return "".join(l)
  60. def str2(s, default = "<not str convertable>" ):
  61. """converts passed object to a printable string, to repr, or
  62. returns provided default in that order of precendence."""
  63. try:
  64. return printable(str(s))
  65. except:
  66. try:
  67. return repr(s)
  68. except:
  69. return default
  70. if __name__ == "__main__":
  71. assert split( "" ) == [''], split( "" )
  72. assert split( "a b c" ) == ['a','b','c'], split( "a b c" )
  73. assert split( "a" ) == ['a'], split( "a" )
  74. assert split( " a", ',' ) == [' a'], split( " a", ',')
  75. assert split( "a,b,c", ',' ) == ['a','b','c'], split( "a,b,c", ',' )
  76. assert split( "a,b,", ',' ) == ['a','b',''], split( "a,b,", ',' )
  77. assert split( "'a',b", ',' ) == ["'a'",'b'], split( "'a',b", ',' )
  78. assert split( "'a,b'", ',' ) == ["'a,b'"], split( "'a,b'", ',' )
  79. assert split( "a,'b,\"cd\",e',f", ',', keep_quote=False) == ['a', 'b,"cd",e', 'f']
  80. assert split( 'a,"b,\'cd\',e",f', ',', keep_quote=False) == ['a', "b,'cd',e", 'f']
  81. assert split( "a - b - c", " - " ) == ['a','b','c'], split( "a - b - c", " - " )
  82. s = "Aug 19 06:26:29 tracker-01 hypertracker.event - 6140 - INFO - ihash=ed25f"
  83. assert split( s, ' - ' ) == ['Aug 19 06:26:29 tracker-01 hypertracker.event',
  84. '6140', 'INFO', 'ihash=ed25f'], split( s, ' - ')
  85. assert str2('foo') == 'foo'
  86. assert str2(u'foo') == 'foo'
  87. assert str2(None) == "None"
  88. print "passed all tests"