Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: 

 

# Copyright 2014-2018 Florian Bruhin (The Compiler) <mail@qutebrowser.org> 

# 

# This file is part of qutebrowser. 

# 

# qutebrowser is free software: you can redistribute it and/or modify 

# it under the terms of the GNU General Public License as published by 

# the Free Software Foundation, either version 3 of the License, or 

# (at your option) any later version. 

# 

# qutebrowser is distributed in the hope that it will be useful, 

# but WITHOUT ANY WARRANTY; without even the implied warranty of 

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

# GNU General Public License for more details. 

# 

# You should have received a copy of the GNU General Public License 

# along with qutebrowser. If not, see <http://www.gnu.org/licenses/>. 

 

"""Our own fork of shlex.split with some added and removed features.""" 

 

import re 

 

from qutebrowser.utils import log, utils 

 

 

class ShellLexer: 

 

"""A lexical analyzer class for simple shell-like syntaxes. 

 

Based on Python's shlex, but cleaned up, removed some features, and added 

some features useful for qutebrowser. 

 

Attributes: 

FIXME 

""" 

 

def __init__(self, s): 

self.string = s 

self.whitespace = ' \t\r' 

self.quotes = '\'"' 

self.escape = '\\' 

self.escapedquotes = '"' 

self.keep = False 

self.quoted = None 

self.escapedstate = None 

self.token = None 

self.state = None 

self.reset() 

 

def reset(self): 

"""Reset the state machine state to the defaults.""" 

self.quoted = False 

self.escapedstate = ' ' 

self.token = '' 

self.state = ' ' 

 

def __iter__(self): # noqa: C901 pragma: no mccabe 

"""Read a raw token from the input stream.""" 

self.reset() 

for nextchar in self.string: 

if self.state == ' ': 

if self.keep: 

self.token += nextchar 

if nextchar in self.whitespace: 

if self.token or self.quoted: 

yield self.token 

self.reset() 

elif nextchar in self.escape: 

self.escapedstate = 'a' 

self.state = nextchar 

elif nextchar in self.quotes: 

self.state = nextchar 

else: 

self.token = nextchar 

self.state = 'a' 

elif self.state in self.quotes: 

self.quoted = True 

if nextchar == self.state: 

if self.keep: 

self.token += nextchar 

self.state = 'a' 

elif (nextchar in self.escape and 

self.state in self.escapedquotes): 

if self.keep: 

self.token += nextchar 

self.escapedstate = self.state 

self.state = nextchar 

else: 

self.token += nextchar 

elif self.state in self.escape: 

# In posix shells, only the quote itself or the escape 

# character may be escaped within quotes. 

if (self.escapedstate in self.quotes and 

nextchar != self.state and 

nextchar != self.escapedstate and not self.keep): 

self.token += self.state 

self.token += nextchar 

self.state = self.escapedstate 

elif self.state == 'a': 

if nextchar in self.whitespace: 

self.state = ' ' 

assert self.token or self.quoted 

yield self.token 

self.reset() 

if self.keep: 

yield nextchar 

elif nextchar in self.quotes: 

if self.keep: 

self.token += nextchar 

self.state = nextchar 

elif nextchar in self.escape: 

if self.keep: 

self.token += nextchar 

self.escapedstate = 'a' 

self.state = nextchar 

else: 

self.token += nextchar 

else: 

raise utils.Unreachable( 

"Invalid state {!r}!".format(self.state)) 

if self.state in self.escape and not self.keep: 

self.token += self.state 

if self.token or self.quoted: 

yield self.token 

 

 

def split(s, keep=False): 

"""Split a string via ShellLexer. 

 

Args: 

keep: Whether to keep special chars in the split output. 

""" 

lexer = ShellLexer(s) 

lexer.keep = keep 

tokens = list(lexer) 

if not tokens: 

return [] 

out = [] 

spaces = "" 

 

log.shlexer.vdebug("{!r} -> {!r}".format(s, tokens)) 

 

for t in tokens: 

if t.isspace(): 

spaces += t 

else: 

out.append(spaces + t) 

spaces = "" 

if spaces: 

out.append(spaces) 

 

return out 

 

 

def _combine_ws(parts, whitespace): 

"""Combine whitespace in a list with the element following it. 

 

Args: 

parts: A list of strings. 

whitespace: A string containing what's considered whitespace. 

 

Return: 

The modified list. 

""" 

out = [] 

ws = '' 

for part in parts: 

if not part: 

continue 

elif part in whitespace: 

ws += part 

else: 

out.append(ws + part) 

ws = '' 

if ws: 

out.append(ws) 

return out 

 

 

def simple_split(s, keep=False, maxsplit=None): 

"""Split a string on whitespace, optionally keeping the whitespace. 

 

Args: 

s: The string to split. 

keep: Whether to keep whitespace. 

maxsplit: The maximum count of splits. 

 

Return: 

A list of split strings. 

""" 

whitespace = '\n\t ' 

if maxsplit == 0: 

# re.split with maxsplit=0 splits everything, while str.split splits 

# nothing (which is the behavior we want). 

if keep: 

return [s] 

else: 

return [s.strip(whitespace)] 

elif maxsplit is None: 

maxsplit = 0 

 

if keep: 

pattern = '([' + whitespace + '])' 

parts = re.split(pattern, s, maxsplit) 

return _combine_ws(parts, whitespace) 

else: 

pattern = '[' + whitespace + ']' 

parts = re.split(pattern, s, maxsplit) 

parts[-1] = parts[-1].rstrip() 

return [p for p in parts if p]