Coverage for qutebrowser/utils/urlutils.py : 100%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
# Copyright 2014-2018 Florian Bruhin (The Compiler) <mail@qutebrowser.org> # # This file is part of qutebrowser. # # qutebrowser is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # qutebrowser is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with qutebrowser. If not, see <http://www.gnu.org/licenses/>.
# FIXME: we probably could raise some exceptions on invalid URLs # https://github.com/qutebrowser/qutebrowser/issues/108
"""Error raised if a function got an invalid URL.
Inherits ValueError because that was the exception originally used for that, so there still might be some code around which checks for that. """
"""Get a search engine name and search term from a string.
Args: s: The string to get a search engine for.
Return: A (engine, term) tuple, where engine is None for the default engine. """
else: else:
"""Get a search engine URL for a text.
Args: txt: Text to search for.
Return: The search URL as a QUrl. """
"""Naive check if given URL is really a URL.
Args: urlstr: The URL to check for, as string.
Return: True if the URL really is a URL, False otherwise. """
# Valid IPv4/IPv6 address
# Qt treats things like "23.42" or "1337" or "0xDEAD" as valid URLs # which we don't want to. Note we already filtered *real* valid IPs # above.
"""Check if a URL is really a URL via DNS.
Args: url: The URL to check for as a string.
Return: True if the URL really is a URL, False otherwise. """
not QHostAddress(urlstr).isNull()): # Qt treats things like "23.42" or "1337" or "0xDEAD" as valid URLs # which we don't want to.
force_search=False): """Get a QUrl based on a user input which is URL or search term.
Args: urlstr: URL to load as a string. cwd: The current working directory, or None. relative: Whether to resolve relative files. do_search: Whether to perform a search on non-URLs. force_search: Whether to force a search even if the content can be interpreted as a URL or a path.
Return: A target QUrl to a search page or the original URL. """ check_exists=True)
# probably a search term else: # probably an address urlstr, url.toDisplayString())) else:
"""Check if a url has an explicit scheme given.
Args: url: The URL as QUrl. """ # Note that generic URI syntax actually would allow a second colon # after the scheme delimiter. Since we don't know of any URIs # using this and want to support e.g. searching for scoped C++ # symbols, we treat this as not a URI anyways. (url.host() or url.path()) and ' ' not in url.path() and not url.path().startswith(':'))
"""Return True if url is an about:... or other special URL.
Args: url: The URL as QUrl. """
"""Check if url seems to be a valid URL.
Args: urlstr: The URL as string.
Return: True if it is a valid URL, False otherwise. """
urlstr, autosearch))
# no autosearch, so everything is a URL unless it has an explicit # search engine. else:
# This will also catch URLs containing spaces.
# URLs with explicit schemes are always URLs # Special URLs are always URLs, even with autosearch=never # We want to use qurl_from_user_input here, as the user might enter # "foo.de" and that should be treated as URL here. else: # pragma: no cover raise ValueError("Invalid autosearch value")
"""Get a QUrl based on a user input. Additionally handles IPv6 addresses.
QUrl.fromUserInput handles something like '::1' as a file URL instead of an IPv6, so we first try to handle it as a valid IPv6, and if that fails we use QUrl.fromUserInput.
WORKAROUND - https://bugreports.qt.io/browse/QTBUG-41089 FIXME - Maybe https://codereview.qt-project.org/#/c/93851/ has a better way to solve this? https://github.com/qutebrowser/qutebrowser/issues/109
Args: urlstr: The URL as string.
Return: The converted QUrl. """ # First we try very liberally to separate something like an IPv6 from the # rest (e.g. path info or parameters) else: # Then we try to parse it as an IPv6, and if we fail use # QUrl.fromUserInput. else:
"""Display an error message for a URL.
Args: action: The action which was interrupted by the error. """ url.toDisplayString())) url, "Trying to {} with invalid URL".format(action))
"""Check if the given QUrl is invalid, and if so, raise a CommandError."""
"""Check if path is a valid path.
Args: pathstr: The path as string. cwd: The current working directory, or None. relative: Whether to resolve relative files. check_exists: Whether to check if the file actually exists of filesystem.
Return: The path if it is a valid path, None otherwise. """
else:
else: "URL contains characters which are not present in the " "current locale")
"""Get a suitable filename from a URL.
Args: url: The URL to parse, as a QUrl.
Return: The suggested filename as a string, or None. """ else:
"""Get a (scheme, host, port) tuple from a QUrl.
This is suitable to identify a connection, e.g. for SSL errors. """ url.toDisplayString())) 'http': 80, 'https': 443, 'ftp': 21, } url.toDisplayString()))
"""Get an error string for a URL.
Args: url: The URL as a QUrl. base: The base error string.
Return: A new string with url.errorString() is appended if available. """ else:
"""Check if url1 and url2 belong to the same website.
This will use a "public suffix list" to determine what a "top level domain" is. All further domains are ignored.
For example example.com and www.example.com are considered the same. but example.co.uk and test.co.uk are not.
Return: True if the domains are the same, False otherwise. """
"""Return the fully encoded url as string.
Args: url: The url to encode as QUrl. """
"""Exception raised by incdec_number on problems.
Attributes: msg: The error message. url: The QUrl which caused the error. """
"""Get an incremented/decremented URL based on a URL match.""" # This should always succeed because we match \d+ else:
"""Find a number in the url and increment or decrement it.
Args: url: The current url incdec: Either 'increment' or 'decrement' count: The number to increment or decrement by segments: A set of URL segments to search. Valid segments are: 'host', 'port', 'path', 'query', 'anchor'. Default: {'path', 'query'}
Return: The new url with the number incremented/decremented.
Raises IncDecError if the url contains no number. """
', '.join(extra_elements)), url)
# Make a copy of the QUrl so we don't modify the original # Order as they appear in a URL ('host', url.host, url.setHost), ('port', lambda: str(url.port()) if url.port() > 0 else '', lambda x: url.setPort(int(x))), ('path', url.path, url.setPath), ('query', url.query, url.setQuery), ('anchor', url.fragment, url.setFragment), ] # We're searching the last number so we walk the url segments backwards
# Get the last number in a string
"""Return a file:// url (as string) to the given local path.
Arguments: path: The absolute path to the local file """
"""Get a data: QUrl for the given data."""
"""Get a IDN-homograph phishing safe form of the given QUrl.
If we're dealing with a Punycode-encoded URL, this prepends the hostname in its encoded form, to make sure those URLs are distinguishable.
See https://github.com/qutebrowser/qutebrowser/issues/2547 and https://bugreports.qt.io/browse/QTBUG-60365 """
if '..' in host: # pragma: no cover # WORKAROUND for https://bugreports.qt.io/browse/QTBUG-60364 return '(unparseable URL!) {}'.format(qurl.toDisplayString())
"""Get a query string for the given URL.
This is a WORKAROUND for: https://www.riverbankcomputing.com/pipermail/pyqt/2017-November/039702.html """ except AttributeError: # pragma: no cover return QUrlQuery(qurl).query()
"""Error raised when proxy_from_url gets an unknown proxy type."""
"""Create a QNetworkProxy from QUrl and a proxy type.
Args: url: URL of a proxy (possibly with credentials).
Return: New QNetworkProxy. """
'http': QNetworkProxy.HttpProxy, 'socks': QNetworkProxy.Socks5Proxy, 'socks5': QNetworkProxy.Socks5Proxy, 'direct': QNetworkProxy.NoProxy, }
|