Python源码示例:pygments.util.text_type()
示例1
def get_tokens(self, text):
if isinstance(text, text_type):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2
text = bz2.decompress(text)
# do not call Lexer.get_tokens() because we do not want Unicode
# decoding to occur, and stripping is not optional.
text = text.strip(b'\n') + b'\n'
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
示例2
def get_tokens(self, text):
if isinstance(text, text_type):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2
text = bz2.decompress(text)
# do not call Lexer.get_tokens() because we do not want Unicode
# decoding to occur, and stripping is not optional.
text = text.strip(b'\n') + b'\n'
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
示例3
def get_tokens(self, text):
if isinstance(text, text_type):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2
text = bz2.decompress(text)
# do not call Lexer.get_tokens() because we do not want Unicode
# decoding to occur, and stripping is not optional.
text = text.strip(b'\n') + b'\n'
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
示例4
def get_tokens(self, text):
if isinstance(text, text_type):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2
text = bz2.decompress(text)
# do not call Lexer.get_tokens() because we do not want Unicode
# decoding to occur, and stripping is not optional.
text = text.strip(b'\n') + b'\n'
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
示例5
def get_tokens(self, text):
if isinstance(text, text_type):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2
text = bz2.decompress(text)
# do not call Lexer.get_tokens() because we do not want Unicode
# decoding to occur, and stripping is not optional.
text = text.strip(b'\n') + b'\n'
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
示例6
def get_tokens(self, text):
if isinstance(text, text_type):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2
text = bz2.decompress(text)
# do not call Lexer.get_tokens() because we do not want Unicode
# decoding to occur, and stripping is not optional.
text = text.strip(b'\n') + b'\n'
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
示例7
def get_tokens(self, text):
if isinstance(text, text_type):
# raw token stream never has any non-ASCII characters
text = text.encode('ascii')
if self.compress == 'gz':
import gzip
gzipfile = gzip.GzipFile('', 'rb', 9, BytesIO(text))
text = gzipfile.read()
elif self.compress == 'bz2':
import bz2
text = bz2.decompress(text)
# do not call Lexer.get_tokens() because we do not want Unicode
# decoding to occur, and stripping is not optional.
text = text.strip(b'\n') + b'\n'
for i, t, v in self.get_tokens_unprocessed(text):
yield t, v
示例8
def get_tokens_unprocessed(self, text):
row_tokenizer = RowTokenizer()
var_tokenizer = VariableTokenizer()
index = 0
for row in text.splitlines():
for value, token in row_tokenizer.tokenize(row):
for value, token in var_tokenizer.tokenize(value, token):
if value:
yield index, token, text_type(value)
index += len(value)
示例9
def get_tokens_unprocessed(self, text):
row_tokenizer = RowTokenizer()
var_tokenizer = VariableTokenizer()
index = 0
for row in text.splitlines():
for value, token in row_tokenizer.tokenize(row):
for value, token in var_tokenizer.tokenize(value, token):
if value:
yield index, token, text_type(value)
index += len(value)
示例10
def get_tokens_unprocessed(self, text):
row_tokenizer = RowTokenizer()
var_tokenizer = VariableTokenizer()
index = 0
for row in text.splitlines():
for value, token in row_tokenizer.tokenize(row):
for value, token in var_tokenizer.tokenize(value, token):
if value:
yield index, token, text_type(value)
index += len(value)
示例11
def get_tokens_unprocessed(self, text):
row_tokenizer = RowTokenizer()
var_tokenizer = VariableTokenizer()
index = 0
for row in text.splitlines():
for value, token in row_tokenizer.tokenize(row):
for value, token in var_tokenizer.tokenize(value, token):
if value:
yield index, token, text_type(value)
index += len(value)
示例12
def get_tokens_unprocessed(self, text):
row_tokenizer = RowTokenizer()
var_tokenizer = VariableTokenizer()
index = 0
for row in text.splitlines():
for value, token in row_tokenizer.tokenize(row):
for value, token in var_tokenizer.tokenize(value, token):
if value:
yield index, token, text_type(value)
index += len(value)
示例13
def get_tokens_unprocessed(self, text):
row_tokenizer = RowTokenizer()
var_tokenizer = VariableTokenizer()
index = 0
for row in text.splitlines():
for value, token in row_tokenizer.tokenize(row):
for value, token in var_tokenizer.tokenize(value, token):
if value:
yield index, token, text_type(value)
index += len(value)
示例14
def get_tokens_unprocessed(self, text):
row_tokenizer = RowTokenizer()
var_tokenizer = VariableTokenizer()
index = 0
for row in text.splitlines():
for value, token in row_tokenizer.tokenize(row):
for value, token in var_tokenizer.tokenize(value, token):
if value:
yield index, token, text_type(value)
index += len(value)