statement_splitter.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. #
  2. # Copyright (C) 2009-2020 the sqlparse authors and contributors
  3. # <see AUTHORS file>
  4. #
  5. # This module is part of python-sqlparse and is released under
  6. # the BSD License: https://opensource.org/licenses/BSD-3-Clause
  7. from sqlparse import sql, tokens as T
  8. class StatementSplitter:
  9. """Filter that split stream at individual statements"""
  10. def __init__(self):
  11. self._reset()
  12. def _reset(self):
  13. """Set the filter attributes to its default values"""
  14. self._in_declare = False
  15. self._in_case = False
  16. self._is_create = False
  17. self._begin_depth = 0
  18. self._seen_begin = False
  19. self.consume_ws = False
  20. self.tokens = []
  21. self.level = 0
  22. def _change_splitlevel(self, ttype, value):
  23. """Get the new split level (increase, decrease or remain equal)"""
  24. # parenthesis increase/decrease a level
  25. if ttype is T.Punctuation and value == '(':
  26. return 1
  27. elif ttype is T.Punctuation and value == ')':
  28. return -1
  29. elif ttype not in T.Keyword: # if normal token return
  30. return 0
  31. # Everything after here is ttype = T.Keyword
  32. # Also to note, once entered an If statement you are done and basically
  33. # returning
  34. unified = value.upper()
  35. # three keywords begin with CREATE, but only one of them is DDL
  36. # DDL Create though can contain more words such as "or replace"
  37. if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
  38. self._is_create = True
  39. return 0
  40. # can have nested declare inside of being...
  41. if unified == 'DECLARE' and self._is_create and self._begin_depth == 0:
  42. self._in_declare = True
  43. return 1
  44. if unified == 'BEGIN':
  45. self._begin_depth += 1
  46. self._seen_begin = True
  47. if self._is_create:
  48. # FIXME(andi): This makes no sense. ## this comment neither
  49. return 1
  50. return 0
  51. # Issue826: If we see a transaction keyword after BEGIN,
  52. # it's a transaction statement, not a block.
  53. if self._seen_begin and \
  54. (ttype is T.Keyword or ttype is T.Name) and \
  55. unified in ('TRANSACTION', 'WORK', 'TRAN',
  56. 'DISTRIBUTED', 'DEFERRED',
  57. 'IMMEDIATE', 'EXCLUSIVE'):
  58. self._begin_depth = max(0, self._begin_depth - 1)
  59. self._seen_begin = False
  60. return 0
  61. # BEGIN and CASE/WHEN both end with END
  62. if unified == 'END':
  63. if not self._in_case:
  64. self._begin_depth = max(0, self._begin_depth - 1)
  65. else:
  66. self._in_case = False
  67. return -1
  68. if (unified in ('IF', 'FOR', 'WHILE', 'CASE')
  69. and self._is_create and self._begin_depth > 0):
  70. if unified == 'CASE':
  71. self._in_case = True
  72. return 1
  73. if unified in ('END IF', 'END FOR', 'END WHILE'):
  74. return -1
  75. # Default
  76. return 0
  77. def process(self, stream):
  78. """Process the stream"""
  79. EOS_TTYPE = T.Whitespace, T.Comment.Single
  80. # Run over all stream tokens
  81. for ttype, value in stream:
  82. # Yield token if we finished a statement and there's no whitespaces
  83. # It will count newline token as a non whitespace. In this context
  84. # whitespace ignores newlines.
  85. # why don't multi line comments also count?
  86. if self.consume_ws and ttype not in EOS_TTYPE:
  87. yield sql.Statement(self.tokens)
  88. # Reset filter and prepare to process next statement
  89. self._reset()
  90. # Change current split level (increase, decrease or remain equal)
  91. self.level += self._change_splitlevel(ttype, value)
  92. # Append the token to the current statement
  93. self.tokens.append(sql.Token(ttype, value))
  94. # Check if we get the end of a statement
  95. # Issue762: Allow GO (or "GO 2") as statement splitter.
  96. # When implementing a language toggle, it's not only to add
  97. # keywords it's also to change some rules, like this splitting
  98. # rule.
  99. # Issue809: Ignore semicolons inside BEGIN...END blocks, but handle
  100. # standalone BEGIN; as a transaction statement
  101. if ttype is T.Punctuation and value == ';':
  102. # If we just saw BEGIN; then this is a transaction BEGIN,
  103. # not a BEGIN...END block, so decrement depth
  104. if self._seen_begin:
  105. self._begin_depth = max(0, self._begin_depth - 1)
  106. self._seen_begin = False
  107. # Split on semicolon if not inside a BEGIN...END block
  108. if self.level <= 0 and self._begin_depth == 0:
  109. self.consume_ws = True
  110. elif ttype is T.Keyword and value.split()[0] == 'GO':
  111. self.consume_ws = True
  112. elif (ttype not in (T.Whitespace, T.Newline, T.Comment.Single,
  113. T.Comment.Multiline)
  114. and not (ttype is T.Keyword and value.upper() == 'BEGIN')):
  115. # Reset _seen_begin if we see a non-whitespace, non-comment
  116. # token but not for BEGIN itself (which just set the flag)
  117. self._seen_begin = False
  118. # Yield pending statement (if any)
  119. if self.tokens and not all(t.is_whitespace for t in self.tokens):
  120. yield sql.Statement(self.tokens)