dev-python/scrapy: backport lift on dependency restriction

dev-python/twisted::gentoo surpassed max. allowed version.
Enable more tests.
Fix DEPEND/BDEPEND confusion.
Update IUSE and RESTRICT to account for the test USE flag.
Remove superfluous ${PYTHON_DEPS}, as it's already inherited.

Signed-off-by: Lucio Sauer <watermanpaint@posteo.net>
This commit is contained in:
Lucio Sauer 2023-12-30 21:02:09 +01:00
parent 6e2a54bf38
commit 8dbc037bbf
No known key found for this signature in database
GPG Key ID: 5568A2DDA60CA297
2 changed files with 160 additions and 0 deletions

View File

@ -0,0 +1,99 @@
Backport commits 720f351a3eea5e5bfa83a6eaf50210cd1fa43992^..0630e4aaa10c3fb8c79c2542a229f5c0632cddde.
At the time of writing, ::gentoo no longer has <dev-python/twisted-23.8.0.
These commits refactor the signal handling logic to support newer versions of Twisted.
Author: Lucio Sauer <watermanpaint@posteo.net>
--- a/scrapy/crawler.py
+++ b/scrapy/crawler.py
@@ -404,8 +404,8 @@ class CrawlerProcess(CrawlerRunner):
:param bool stop_after_crawl: stop or not the reactor when all
crawlers have finished
- :param bool install_signal_handlers: whether to install the shutdown
- handlers (default: True)
+ :param bool install_signal_handlers: whether to install the OS signal
+ handlers from Twisted and Scrapy (default: True)
"""
from twisted.internet import reactor
@@ -416,15 +416,17 @@ class CrawlerProcess(CrawlerRunner):
return
d.addBoth(self._stop_reactor)
- if install_signal_handlers:
- install_shutdown_handlers(self._signal_shutdown)
resolver_class = load_object(self.settings["DNS_RESOLVER"])
resolver = create_instance(resolver_class, self.settings, self, reactor=reactor)
resolver.install_on_reactor()
tp = reactor.getThreadPool()
tp.adjustPoolsize(maxthreads=self.settings.getint("REACTOR_THREADPOOL_MAXSIZE"))
reactor.addSystemEventTrigger("before", "shutdown", self.stop)
- reactor.run(installSignalHandlers=False) # blocking call
+ if install_signal_handlers:
+ reactor.addSystemEventTrigger(
+ "after", "startup", install_shutdown_handlers, self._signal_shutdown
+ )
+ reactor.run(installSignalHandlers=install_signal_handlers) # blocking call
def _graceful_stop_reactor(self) -> Deferred:
d = self.stop()
--- a/scrapy/utils/ossignal.py
+++ b/scrapy/utils/ossignal.py
@@ -19,13 +19,10 @@ def install_shutdown_handlers(
function: SignalHandlerT, override_sigint: bool = True
) -> None:
"""Install the given function as a signal handler for all common shutdown
- signals (such as SIGINT, SIGTERM, etc). If override_sigint is ``False`` the
- SIGINT handler won't be install if there is already a handler in place
- (e.g. Pdb)
+ signals (such as SIGINT, SIGTERM, etc). If ``override_sigint`` is ``False`` the
+ SIGINT handler won't be installed if there is already a handler in place
+ (e.g. Pdb)
"""
- from twisted.internet import reactor
-
- reactor._handleSignals()
signal.signal(signal.SIGTERM, function)
if signal.getsignal(signal.SIGINT) == signal.default_int_handler or override_sigint:
signal.signal(signal.SIGINT, function)
--- a/scrapy/utils/testproc.py
+++ b/scrapy/utils/testproc.py
@@ -2,7 +2,7 @@ from __future__ import annotations
import os
import sys
-from typing import Iterable, Optional, Tuple, cast
+from typing import Iterable, List, Optional, Tuple, cast
from twisted.internet.defer import Deferred
from twisted.internet.error import ProcessTerminated
@@ -26,14 +26,15 @@ class ProcessTest:
env = os.environ.copy()
if settings is not None:
env["SCRAPY_SETTINGS_MODULE"] = settings
+ assert self.command
cmd = self.prefix + [self.command] + list(args)
pp = TestProcessProtocol()
- pp.deferred.addBoth(self._process_finished, cmd, check_code)
+ pp.deferred.addCallback(self._process_finished, cmd, check_code)
reactor.spawnProcess(pp, cmd[0], cmd, env=env, path=self.cwd)
return pp.deferred
def _process_finished(
- self, pp: TestProcessProtocol, cmd: str, check_code: bool
+ self, pp: TestProcessProtocol, cmd: List[str], check_code: bool
) -> Tuple[int, bytes, bytes]:
if pp.exitcode and check_code:
msg = f"process {cmd} exit with code {pp.exitcode}"
--- a/setup.py
+++ b/setup.py
@@ -6,8 +6,7 @@ version = (Path(__file__).parent / "scrapy/VERSION").read_text("ascii").strip()
install_requires = [
- # 23.8.0 incompatibility: https://github.com/scrapy/scrapy/issues/6024
- "Twisted>=18.9.0,<23.8.0",
+ "Twisted>=18.9.0",
"cryptography>=36.0.0",
"cssselect>=0.9.1",
"itemloaders>=1.0.1",

View File

@ -0,0 +1,61 @@
# Copyright 1999-2023 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
EAPI=8
PYTHON_COMPAT=( python3_{10..12} )
DISTUTILS_USE_PEP517=setuptools
inherit distutils-r1
DESCRIPTION="A high-level Web Crawling and Web Scraping framework"
HOMEPAGE="https://scrapy.org/"
SRC_URI="https://github.com/scrapy/scrapy/archive/refs/tags/${PV}.tar.gz -> ${P}.tar.gz"
LICENSE="BSD"
SLOT=0
KEYWORDS="~amd64"
IUSE="test"
RESTRICT="!test? ( test )"
# The 'PyDispatcher>=2.0.5' distribution was not found and is required by Scrapy
# https://bugs.gentoo.org/684734
RDEPEND="dev-python/cssselect[${PYTHON_USEDEP}]
dev-python/cryptography[${PYTHON_USEDEP}]
<dev-python/priority-2.0.0[${PYTHON_USEDEP}]
dev-python/h2[${PYTHON_USEDEP}]
dev-python/itemadapter[${PYTHON_USEDEP}]
dev-python/itemloaders[${PYTHON_USEDEP}]
dev-python/lxml[${PYTHON_USEDEP}]
dev-python/parsel[${PYTHON_USEDEP}]
dev-python/protego[${PYTHON_USEDEP}]
>=dev-python/pydispatcher-2.0.5[${PYTHON_USEDEP}]
dev-python/pyopenssl[${PYTHON_USEDEP}]
dev-python/queuelib[${PYTHON_USEDEP}]
dev-python/service-identity[${PYTHON_USEDEP}]
dev-python/six[${PYTHON_USEDEP}]
dev-python/tldextract[${PYTHON_USEDEP}]
>=dev-python/twisted-18.9.0[${PYTHON_USEDEP}]
dev-python/w3lib[${PYTHON_USEDEP}]
dev-python/zope-interface[${PYTHON_USEDEP}]
"
BDEPEND="
test? (
${RDEPEND}
dev-python/testfixtures[${PYTHON_USEDEP}]
dev-python/uvloop[${PYTHON_USEDEP}]
)
"
PATCHES="${FILESDIR}"/${P}-lift-twisted-restriction.patch
distutils_enable_tests pytest
EPYTEST_DESELECT=(
# these require (local) network access
tests/test_command_check.py
tests/test_feedexport.py
tests/test_pipeline_files.py::TestFTPFileStore::test_persist
# Flaky test: https://github.com/scrapy/scrapy/issues/6193
tests/test_crawl.py::CrawlTestCase::test_start_requests_laziness
)
EPYTEST_IGNORE=( docs )