includes/archive.php
<?php
declare(strict_types=1);
require_once __DIR__ . '/core.php';
/*
|--------------------------------------------------------------------------
| Archive library + endpoint
|--------------------------------------------------------------------------
|
| Included from the dashboard:
| Provides archive readers.
|
| Called directly:
| Creates, verifies and purges yearly archives.
|
| Archives are stored per Brivacia site, for example:
| /archives/2025-breat.json
| /archives/2025-code.json
|
| This prevents a replace/import for one site from overwriting archived data
| belonging to another site.
|
*/
if (brivaciaArchiveIsEndpoint()) {
loadTranslations();
runYearArchiveEndpoint();
}
/*
|--------------------------------------------------------------------------
| Archive readers
|--------------------------------------------------------------------------
*/
function brivaciaArchiveIsEndpoint(): bool {
return realpath($_SERVER['SCRIPT_FILENAME'] ?? '') === realpath(__FILE__);
}
function archiveSafeSiteId(string $site): string {
$site = strtolower(trim($site));
$site = preg_replace('/[^a-z0-9_-]+/', '-', $site) ?? '';
$site = trim($site, '-_');
return $site !== '' ? $site : 'site';
}
function archiveFileName(int $year, string $site): string {
return sprintf('%04d-%s.json', $year, archiveSafeSiteId($site));
}
function archiveFileYear(string $file): int {
preg_match('/^(\d{4})(?:-[a-z0-9_-]+)?\.json$/i', basename($file), $match);
return isset($match[1]) ? (int)$match[1] : 0;
}
function archiveFileSite(string $file): string {
preg_match('/^\d{4}-([a-z0-9_-]+)\.json$/i', basename($file), $match);
return isset($match[1]) ? (string)$match[1] : '';
}
function brivaciaArchiveFiles(): array {
$files = glob(archiveDir() . '/*.json') ?: [];
$files = array_values(array_filter($files, static function (string $file): bool {
// Accept both new per-site archives (2025-site.json) and old legacy
// archives (2025.json), but ignore summary.json and temporary files.
return preg_match('/^\d{4}(?:-[a-z0-9_-]+)?\.json$/i', basename($file)) === 1;
}));
sort($files, SORT_NATURAL | SORT_FLAG_CASE);
return $files;
}
function brivaciaArchiveSummaryFile(): string {
return archiveDir() . '/summary.json';
}
function readYearArchive(int $year): array {
static $cache = [];
if (isset($cache[$year])) {
return $cache[$year];
}
$merged = [
'year' => $year,
'generated_at' => '',
'days' => [],
'pages' => [],
'countries' => [],
'referrers' => [],
];
foreach (brivaciaArchiveFiles() as $file) {
if (archiveFileYear($file) !== $year) {
continue;
}
$archive = readArchiveFile($file);
foreach (['days', 'pages', 'countries', 'referrers'] as $section) {
foreach (($archive[$section] ?? []) as $row) {
$merged[$section][] = $row;
}
}
if (($archive['generated_at'] ?? '') !== '') {
$merged['generated_at'] = (string)$archive['generated_at'];
}
}
return $cache[$year] = $merged;
}
function readArchiveFile(string $file): array {
static $cache = [];
if (isset($cache[$file])) {
return $cache[$file];
}
$json = json_decode((string)file_get_contents($file), true);
return $cache[$file] = is_array($json) ? $json : [];
}
function archiveYearOverlaps(int $year, ?string $start, ?string $end): bool {
if ($start === null || $end === null) {
return true;
}
return "$year-12-31" >= $start && "$year-01-01" <= $end;
}
function archiveRows(string $section, ?string $start = null, ?string $end = null): array {
$rows = [];
foreach (brivaciaArchiveFiles() as $file) {
$year = archiveFileYear($file);
if ($year <= 0 || !archiveYearOverlaps($year, $start, $end)) {
continue;
}
$archive = readArchiveFile($file);
foreach (($archive[$section] ?? []) as $row) {
$day = (string)($row['day'] ?? '');
if ($start !== null && ($day < $start || $day > $end)) {
continue;
}
$rows[] = $row;
}
}
return $rows;
}
function archiveHitsTotals(?string $start = null, ?string $end = null): array {
$totals = [
'unique_visitors' => 0,
'visits' => 0,
'pageviews' => 0,
'bots' => 0,
];
if ($start === null && $end === null) {
return array_merge($totals, archiveSummary()['hits'] ?? []);
}
foreach (archiveRows('days', $start, $end) as $row) {
foreach ($totals as $key => $_) {
$totals[$key] += (int)($row[$key] ?? 0);
}
}
return $totals;
}
function archiveGroupedViews(string $section, string $keyName, ?string $start = null, ?string $end = null): array {
if ($start === null && $end === null && in_array($section, ['countries', 'referrers'], true)) {
return archiveSummary()[$section] ?? [];
}
$grouped = [];
foreach (archiveRows($section, $start, $end) as $row) {
$key = (string)($row[$keyName] ?? '');
if ($key === '') {
continue;
}
$grouped[$key] ??= [
$keyName => $key,
'views' => 0,
];
$grouped[$key]['views'] += (int)($row['views'] ?? 0);
}
$grouped = array_values($grouped);
usort($grouped, fn($a, $b) => $b['views'] <=> $a['views']);
return $grouped;
}
function archivePages(?string $start = null, ?string $end = null): array {
$pages = [];
foreach (archiveRows('pages', $start, $end) as $row) {
$site = (string)($row['site'] ?? '');
$pageKey = (string)($row['page_key'] ?? '');
if ($site === '' || $pageKey === '') {
continue;
}
$key = $site . "\0" . $pageKey;
$pages[$key] ??= [
'site' => $site,
'page_key' => $pageKey,
'title' => '',
'url' => '',
'views' => 0,
'page_resolved' => 0,
];
$pages[$key]['views'] += (int)($row['views'] ?? 0);
$resolved = (int)($row['page_resolved'] ?? 0);
$title = (string)($row['title'] ?? '');
$url = (string)($row['url'] ?? '');
if ($resolved === 1 || $pages[$key]['title'] === '') {
if ($title !== '') {
$pages[$key]['title'] = $title;
}
if ($url !== '') {
$pages[$key]['url'] = $url;
}
$pages[$key]['page_resolved'] = max(
$pages[$key]['page_resolved'],
$resolved
);
}
}
$pages = array_values($pages);
usort(
$pages,
fn($a, $b) =>
($b['views'] <=> $a['views']) ?:
strcmp((string)$a['title'], (string)$b['title'])
);
return $pages;
}
/*
|--------------------------------------------------------------------------
| Archive summary cache
|--------------------------------------------------------------------------
|
| The dashboard needs all-time counters on every page. Reading every yearly
| archive for each card is wasteful, so closed-year totals are cached here.
|
| The summary intentionally aggregates every per-site archive, because the
| current dashboard is global. A future site filter can add a site-aware
| summary without changing the archive file format.
|
*/
function rebuildArchiveSummary(): array {
$summary = [
'generated_at' => date('c'),
'years' => [],
'sites' => [],
'hits' => [
'unique_visitors' => 0,
'visits' => 0,
'pageviews' => 0,
'bots' => 0,
],
'countries' => [],
'referrers' => [],
];
$countries = [];
$referrers = [];
foreach (brivaciaArchiveFiles() as $file) {
$archive = readArchiveFile($file);
$year = (int)($archive['year'] ?? archiveFileYear($file));
$site = (string)($archive['site'] ?? archiveFileSite($file));
if ($year > 0) {
$summary['years'][] = $year;
}
if ($site !== '') {
$summary['sites'][] = $site;
}
foreach (($archive['days'] ?? []) as $row) {
foreach ($summary['hits'] as $key => $_) {
$summary['hits'][$key] += (int)($row[$key] ?? 0);
}
}
foreach (($archive['countries'] ?? []) as $row) {
$country = (string)($row['country'] ?? '');
if ($country === '') {
continue;
}
$countries[$country] = ($countries[$country] ?? 0) + (int)($row['views'] ?? 0);
}
foreach (($archive['referrers'] ?? []) as $row) {
$referrer = (string)($row['referrer'] ?? '');
if ($referrer === '') {
continue;
}
$referrers[$referrer] = ($referrers[$referrer] ?? 0) + (int)($row['views'] ?? 0);
}
}
$summary['years'] = array_values(array_unique($summary['years']));
sort($summary['years'], SORT_NUMERIC);
$summary['sites'] = array_values(array_unique($summary['sites']));
sort($summary['sites'], SORT_NATURAL | SORT_FLAG_CASE);
foreach ($countries as $country => $views) {
$summary['countries'][] = [
'country' => $country,
'views' => $views,
];
}
foreach ($referrers as $referrer => $views) {
$summary['referrers'][] = [
'referrer' => $referrer,
'views' => $views,
];
}
usort($summary['countries'], fn($a, $b) => $b['views'] <=> $a['views']);
usort($summary['referrers'], fn($a, $b) => $b['views'] <=> $a['views']);
$file = brivaciaArchiveSummaryFile();
$tmp = $file . '.tmp';
file_put_contents(
$tmp,
json_encode(
$summary,
JSON_PRETTY_PRINT |
JSON_UNESCAPED_UNICODE |
JSON_UNESCAPED_SLASHES
),
LOCK_EX
);
rename($tmp, $file);
return $summary;
}
function archiveSummary(): array {
static $summary = null;
if (is_array($summary)) {
return $summary;
}
$file = brivaciaArchiveSummaryFile();
if (is_file($file)) {
$json = json_decode((string)file_get_contents($file), true);
if (is_array($json)) {
return $summary = $json;
}
}
return $summary = rebuildArchiveSummary();
}
/*
|--------------------------------------------------------------------------
| Archive endpoint
|--------------------------------------------------------------------------
*/
function runYearArchiveEndpoint(): void {
$db = brivaciaDb();
$year = (int)($_GET['year'] ?? ((int)date('Y') - 1));
$requestedSite = trim((string)($_GET['site'] ?? ''));
if ($year < 2020 || $year >= (int)date('Y')) {
brivaciaLog('archive/invalid.log', 'invalid year=' . $year);
http_response_code(400);
exit(t('archive.invalid.year'));
}
try {
brivaciaLog(
'archive/year.log',
'start year=' . $year . ' site=' . ($requestedSite !== '' ? $requestedSite : 'all')
);
$sites = $requestedSite !== ''
? [$requestedSite]
: archiveSitesForYear($db, $year);
$files = [];
foreach ($sites as $site) {
$file = createYearArchive($db, $year, $site);
verifyYearArchive($file, $year, $site);
purgeArchivedYear($db, $year, $site);
$files[] = basename($file);
brivaciaLog(
'archive/year.log',
'success year=' . $year . ' site=' . $site . ' file=' . basename($file)
);
}
rebuildArchiveSummary();
} catch (Throwable $e) {
brivaciaLog(
'archive/failed.log',
'failed year=' . $year . ' site=' . ($requestedSite !== '' ? $requestedSite : 'all') . ' error=' . $e->getMessage()
);
http_response_code(500);
exit(t('archive.failed'));
}
header('Content-Type: text/plain; charset=utf-8');
echo t('archive.created') . ' : ' . implode(', ', $files) . PHP_EOL;
echo t('archive.sqlite.purged') . " $year";
exit;
}
function archiveSitesForYear(PDO $db, int $year): array {
$start = sprintf('%04d-01-01', $year);
$end = sprintf('%04d-12-31', $year);
$rows = fetchAll($db, '
SELECT DISTINCT site
FROM hits_daily
WHERE day BETWEEN ? AND ?
AND site != ""
ORDER BY site
', [$start, $end]);
return array_values(array_filter(array_map(
static fn(array $row): string => (string)($row['site'] ?? ''),
$rows
)));
}
function createYearArchive(PDO $db, int $year, string $site): string {
$site = trim($site);
$start = sprintf('%04d-01-01', $year);
$end = sprintf('%04d-12-31', $year);
if ($site === '') {
throw new RuntimeException('Archive site is missing');
}
/*
|--------------------------------------------------------------------------
| Resolve important page labels before freezing a closed year
|--------------------------------------------------------------------------
|
| Archive files are JSON snapshots. Once a year is archived, its SQLite rows
| are purged, so raw imported page labels would otherwise stay raw forever in
| the "all time" dashboard view.
|
*/
refreshPageLabelsForRange(
$db,
'site = ? AND day BETWEEN ? AND ?',
[$site, $start, $end],
100
);
$pages = fetchAll(
$db,
'SELECT * FROM pages_daily WHERE site = ? AND day BETWEEN ? AND ? ORDER BY day, views DESC',
[$site, $start, $end]
);
foreach ($pages as &$page) {
$pageSite = (string)($page['site'] ?? $site);
$pageKey = (string)($page['page_key'] ?? '');
$rawTitle = (string)($page['title'] ?? '');
$rawUrl = (string)($page['url'] ?? '');
$displayUrl = pageUrl($pageSite, $pageKey, $rawUrl);
$page['url'] = $displayUrl;
$page['title'] = cleanPageTitle(
$rawTitle !== '' ? $rawTitle : $pageKey,
$pageSite,
$pageKey,
$displayUrl
);
}
unset($page);
$archive = [
'year' => $year,
'site' => $site,
'generated_at' => date('c'),
'days' => fetchAll(
$db,
'SELECT * FROM hits_daily WHERE site = ? AND day BETWEEN ? AND ? ORDER BY day',
[$site, $start, $end]
),
'pages' => $pages,
'countries' => fetchAll(
$db,
'SELECT * FROM countries_daily WHERE site = ? AND day BETWEEN ? AND ? ORDER BY day, views DESC',
[$site, $start, $end]
),
'referrers' => fetchAll(
$db,
'SELECT * FROM referrers_daily WHERE site = ? AND day BETWEEN ? AND ? ORDER BY day, views DESC',
[$site, $start, $end]
),
];
$file = archiveDir() . '/' . archiveFileName($year, $site);
$tmp = $file . '.tmp';
file_put_contents(
$tmp,
json_encode(
$archive,
JSON_PRETTY_PRINT |
JSON_UNESCAPED_UNICODE |
JSON_UNESCAPED_SLASHES
),
LOCK_EX
);
if (!is_file($tmp) || filesize($tmp) < 50) {
throw new RuntimeException('Archive write failed');
}
rename($tmp, $file);
return $file;
}
function verifyYearArchive(string $file, int $year, string $site = ''): void {
if (!is_file($file) || filesize($file) < 50) {
throw new RuntimeException('Archive file missing or empty');
}
$json = json_decode((string)file_get_contents($file), true);
if (
!is_array($json) ||
($json['year'] ?? null) !== $year ||
($site !== '' && (string)($json['site'] ?? '') !== $site) ||
!isset($json['generated_at']) ||
!isset($json['days'], $json['pages'], $json['countries'], $json['referrers']) ||
!is_array($json['days']) ||
!is_array($json['pages']) ||
!is_array($json['countries']) ||
!is_array($json['referrers'])
) {
throw new RuntimeException('Archive verification failed');
}
}
function purgeArchivedYear(PDO $db, int $year, string $site): void {
$site = trim($site);
$start = sprintf('%04d-01-01', $year);
$end = sprintf('%04d-12-31', $year);
if ($site === '') {
throw new RuntimeException('Archive purge site is missing');
}
$tables = [
'hits_daily',
'pages_daily',
'countries_daily',
'referrers_daily',
'seen_daily',
'visitor_sessions',
];
$db->beginTransaction();
try {
foreach ($tables as $table) {
$db->prepare("DELETE FROM $table WHERE site = ? AND day BETWEEN ? AND ?")
->execute([$site, $start, $end]);
}
$db->commit();
$db->exec('VACUUM');
} catch (Throwable $e) {
$db->rollBack();
throw $e;
}
}
function archiveClosedYears(PDO $db): array {
$currentYear = (int)date('Y');
$rows = fetchAll($db, '
SELECT
site,
CAST(substr(day, 1, 4) AS INTEGER) AS year
FROM hits_daily
WHERE CAST(substr(day, 1, 4) AS INTEGER) < ?
AND site != ""
GROUP BY site, CAST(substr(day, 1, 4) AS INTEGER)
ORDER BY year, site
', [$currentYear]);
$archived = [];
foreach ($rows as $row) {
$site = (string)($row['site'] ?? '');
$year = (int)($row['year'] ?? 0);
if ($site === '' || $year < 2020 || $year >= $currentYear) {
continue;
}
brivaciaLog('archive/year.log', 'auto start year=' . $year . ' site=' . $site);
$file = createYearArchive($db, $year, $site);
verifyYearArchive($file, $year, $site);
purgeArchivedYear($db, $year, $site);
brivaciaLog('archive/year.log', 'auto success year=' . $year . ' site=' . $site);
$archived[] = $year . '-' . $site;
}
if ($archived !== []) {
rebuildArchiveSummary();
}
return $archived;
}