You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

84 lines
2.3 KiB

2 weeks ago
<?php
namespace App\Services\Crawl;
/**
* 跨进程互斥 + 最小请求间隔,满足 arXiv「每 3 秒至多 1 次、单连接」要求。
* 仅在「排队等待」时持锁HTTP 请求在锁外执行,避免阻塞其它进程过久。
*/
class ArxivRequestGate
{
private const LOCK_BASENAME = 'arxiv_request.lock';
private const STATE_BASENAME = 'arxiv_last_request_at.txt';
public function __construct(
protected float $minIntervalSeconds = 4.0,
) {}
public static function fromConfig(): self
{
return new self((float) config('crawl.arxiv.min_interval_seconds', 4));
}
/**
* 等待轮到自己发起 arXiv 请求(持锁时间仅包含间隔 sleep
*/
public function waitTurn(): void
{
$dir = storage_path('framework/cache');
if (! is_dir($dir)) {
mkdir($dir, 0755, true);
}
$lockPath = $dir.'/'.self::LOCK_BASENAME;
$statePath = $dir.'/'.self::STATE_BASENAME;
$handle = fopen($lockPath, 'c+');
if ($handle === false) {
throw new \RuntimeException('无法创建 arXiv 请求锁文件');
}
try {
if (! flock($handle, LOCK_EX)) {
throw new \RuntimeException('无法获取 arXiv 请求锁');
}
$last = is_readable($statePath) ? (float) trim((string) file_get_contents($statePath)) : 0.0;
$remain = $this->minIntervalSeconds - (microtime(true) - $last);
if ($remain > 0) {
usleep((int) ceil($remain * 1_000_000));
}
} finally {
flock($handle, LOCK_UN);
fclose($handle);
}
}
/**
* 记录本次 arXiv 请求已发出(用于下一次间隔计算)。
*/
public function markSent(): void
{
$dir = storage_path('framework/cache');
$statePath = $dir.'/'.self::STATE_BASENAME;
file_put_contents($statePath, (string) microtime(true));
}
/**
* @template T
*
* @param callable(): T $callback
* @return T
*/
public function run(callable $callback): mixed
{
$this->waitTurn();
try {
return $callback();
} finally {
$this->markSent();
}
}
}