includes/modules/import_matomo_db.php
<?php
declare(strict_types=1);
/*
|--------------------------------------------------------------------------
| Matomo database import module
|--------------------------------------------------------------------------
|
| Contains only the direct Matomo database connector, preview helpers and
| importer. Shared request handling and provider routing stay in api/import.php.
|
*/
function brivacia_matomo_db_connect(array $input): PDO
{
$dsn = sprintf(
'mysql:host=%s;port=%d;dbname=%s;charset=utf8mb4',
$input['db_host'],
$input['db_port'],
$input['db_name']
);
return new PDO($dsn, $input['db_user'], $input['db_password'], [
PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION,
PDO::ATTR_DEFAULT_FETCH_MODE => PDO::FETCH_ASSOC,
]);
}
function brivacia_matomo_db_site_info(array $input): array
{
$pdo = brivacia_matomo_db_connect($input);
$prefix = $input['db_prefix'];
if (!preg_match('/^[a-zA-Z0-9_]*$/', $prefix)) {
throw new RuntimeException(t('import.matomo.db.errors.invalid_prefix'));
}
$stmt = $pdo->prepare('SELECT name, main_url FROM ' . $prefix . 'site WHERE idsite = ? LIMIT 1');
$stmt->execute([(int)$input['site']]);
$site = $stmt->fetch();
if (!is_array($site)) {
throw new RuntimeException(t('import.matomo.db.errors.site_not_found'));
}
return [
'name' => (string)($site['name'] ?? ''),
'main_url' => (string)($site['main_url'] ?? ''),
];
}
function brivacia_matomo_db_host_regex(string $domain): string
{
$domain = trim($domain);
if ($domain === '') {
return '';
}
if (preg_match('~^https?://~i', $domain)) {
$host = (string)(parse_url($domain, PHP_URL_HOST) ?: '');
} else {
$host = $domain;
}
$host = strtolower(trim($host));
$host = preg_replace('~[/?#].*$~', '', $host) ?? $host;
$host = preg_replace('/^www\./', '', $host) ?? $host;
if (!preg_match('/^[a-z0-9.-]+\.[a-z]{2,}$/i', $host)) {
return '^$';
}
return '^(https?://)?(www\\.)?' . preg_quote($host, '/') . '([/:?#]|$)';
}
function brivacia_matomo_db_visit_domain_sql(array $input, string $visitAlias = 'v'): string
{
if ($input['domain_filter'] === '') {
return '';
}
return '
AND EXISTS (
SELECT 1
FROM ' . $input['db_prefix'] . 'log_link_visit_action ldf
JOIN ' . $input['db_prefix'] . 'log_action adf ON adf.idaction = ldf.idaction_url
WHERE ldf.idvisit = ' . $visitAlias . '.idvisit
AND adf.type = 1
AND adf.name REGEXP :domain_filter
)
';
}
function brivacia_matomo_db_preview(array $input): array
{
$pdo = brivacia_matomo_db_connect($input);
$prefix = $input['db_prefix'];
$domainSql = brivacia_matomo_db_visit_domain_sql($input);
$params = [
':site' => (int)$input['site'],
':from' => $input['from'] . ' 00:00:00',
':to' => $input['to'] . ' 23:59:59',
];
if ($input['domain_filter'] !== '') {
$params[':domain_filter'] = brivacia_matomo_db_host_regex($input['domain_filter']);
}
$stmt = $pdo->prepare('
SELECT
COUNT(*) AS visits,
COUNT(DISTINCT idvisitor) AS unique_visitors
FROM ' . $prefix . 'log_visit v
WHERE v.idsite = :site
AND v.visit_first_action_time BETWEEN :from AND :to
' . $domainSql
);
$stmt->execute($params);
$visits = $stmt->fetch() ?: [];
$pageParams = $params;
$pageDomainSql = '';
if ($input['domain_filter'] !== '') {
$pageDomainSql = 'AND a.name REGEXP :domain_filter';
}
$stmt = $pdo->prepare('
SELECT COUNT(*) AS pageviews
FROM ' . $prefix . 'log_link_visit_action l
JOIN ' . $prefix . 'log_action a ON a.idaction = l.idaction_url
AND a.type = 1
WHERE l.idsite = :site
AND l.server_time BETWEEN :from AND :to
' . $pageDomainSql
);
$stmt->execute($pageParams);
$pageviews = $stmt->fetch() ?: [];
$stmt = $pdo->prepare('
SELECT COUNT(DISTINCT LOWER(location_country)) AS countries
FROM ' . $prefix . 'log_visit v
WHERE v.idsite = :site
AND v.visit_first_action_time BETWEEN :from AND :to
AND location_country IS NOT NULL
AND location_country != ""
' . $domainSql
);
$stmt->execute($params);
$countries = $stmt->fetch() ?: [];
$stmt = $pdo->prepare('
SELECT COUNT(DISTINCT COALESCE(NULLIF(referer_name, ""), "__direct__")) AS referrers
FROM ' . $prefix . 'log_visit v
WHERE v.idsite = :site
AND v.visit_first_action_time BETWEEN :from AND :to
' . $domainSql
);
$stmt->execute($params);
$referrers = $stmt->fetch() ?: [];
$stmt = $pdo->prepare('
SELECT COUNT(DISTINCT a.name) AS pages
FROM ' . $prefix . 'log_link_visit_action l
JOIN ' . $prefix . 'log_action a ON a.idaction = l.idaction_url
AND a.type = 1
WHERE l.idsite = :site
AND l.server_time BETWEEN :from AND :to
' . $pageDomainSql
);
$stmt->execute($pageParams);
$pages = $stmt->fetch() ?: [];
return [
'visits' => max(0, (int)($visits['visits'] ?? 0)),
'unique_visitors' => max(0, (int)($visits['unique_visitors'] ?? 0)),
'pageviews' => max(0, (int)($pageviews['pageviews'] ?? 0)),
'countries' => max(0, (int)($countries['countries'] ?? 0)),
'referrers' => max(0, (int)($referrers['referrers'] ?? 0)),
'pages' => max(0, (int)($pages['pages'] ?? 0)),
];
}
function brivacia_matomo_db_preview_samples(array $input): array
{
$pdo = brivacia_matomo_db_connect($input);
$prefix = $input['db_prefix'];
$domainSql = brivacia_matomo_db_visit_domain_sql($input);
$params = [
':site' => (int)$input['site'],
':from' => $input['from'] . ' 00:00:00',
':to' => $input['to'] . ' 23:59:59',
];
if ($input['domain_filter'] !== '') {
$params[':domain_filter'] = brivacia_matomo_db_host_regex($input['domain_filter']);
}
$pageDomainSql = $input['domain_filter'] !== ''
? 'AND a.name REGEXP :domain_filter'
: '';
$stmt = $pdo->prepare('
SELECT LOWER(location_country) AS country, COUNT(*) AS visits
FROM ' . $prefix . 'log_visit v
WHERE v.idsite = :site
AND v.visit_first_action_time BETWEEN :from AND :to
AND location_country IS NOT NULL
AND location_country != ""
' . $domainSql . '
GROUP BY LOWER(location_country)
ORDER BY visits DESC
LIMIT 10
');
$stmt->execute($params);
$countries = $stmt->fetchAll() ?: [];
$stmt = $pdo->prepare('
SELECT
COALESCE(NULLIF(referer_url, ""), "") AS referrer_url,
COALESCE(NULLIF(referer_name, ""), "__direct__") AS referrer_label,
COUNT(*) AS visits
FROM ' . $prefix . 'log_visit v
WHERE v.idsite = :site
AND v.visit_first_action_time BETWEEN :from AND :to
' . $domainSql . '
GROUP BY
COALESCE(NULLIF(referer_url, ""), ""),
COALESCE(NULLIF(referer_name, ""), "__direct__")
ORDER BY visits DESC
LIMIT 10
');
$stmt->execute($params);
$referrers = $stmt->fetchAll() ?: [];
$stmt = $pdo->prepare('
SELECT
a.name AS page,
COUNT(*) AS views
FROM ' . $prefix . 'log_link_visit_action l
JOIN ' . $prefix . 'log_action a ON a.idaction = l.idaction_url
AND a.type = 1
WHERE l.idsite = :site
AND l.server_time BETWEEN :from AND :to
' . $pageDomainSql . '
GROUP BY a.name
ORDER BY views DESC
LIMIT 10
');
$stmt->execute($params);
$pages = $stmt->fetchAll() ?: [];
return [
'countries' => array_map(static fn ($row) => [
'country' => (string)($row['country'] ?? ''),
'visits' => max(0, (int)($row['visits'] ?? 0)),
], $countries),
'referrers' => array_map(static function ($row) {
$url = trim((string)($row['referrer_url'] ?? ''));
$label = trim((string)($row['referrer_label'] ?? ''));
$host = $url !== '' ? parse_url($url, PHP_URL_HOST) : '';
if (is_string($host) && $host !== '') {
$host = preg_replace('/^www\./', '', strtolower($host));
$referrer = $label !== '' ? $host . ' [' . $label . ']' : $host;
} else {
$referrer = $label !== '' ? $label : '__direct__';
}
return [
'referrer' => $referrer,
'visits' => max(0, (int)($row['visits'] ?? 0)),
];
}, $referrers),
'pages' => array_map(static fn ($row) => [
'page' => (string)($row['page'] ?? ''),
'views' => max(0, (int)($row['views'] ?? 0)),
], $pages),
];
}
function brivacia_matomo_db_page_url(string $value): string
{
$value = trim($value);
if ($value === '') {
return '';
}
if (preg_match('~^https?://~i', $value)) {
return $value;
}
if (str_starts_with($value, '//')) {
return 'https:' . $value;
}
if (str_starts_with($value, '/')) {
return $value;
}
if (preg_match('~^[a-z0-9.-]+\.[a-z]{2,}(/|$)~i', $value)) {
return 'https://' . $value;
}
return '/' . ltrim($value, '/');
}
function brivacia_matomo_db_page_lang(string $url, string $browserLang = ''): string
{
$path = parse_url($url, PHP_URL_PATH);
if (is_string($path) && preg_match('~^/([a-z]{2})(/|$)~i', $path, $m)) {
return strtolower($m[1]);
}
// Never guess the language for the website root.
// Matomo sometimes records "/" before the language redirect.
if ($path === '/' || $path === '') {
return '';
}
return brivacia_import_lang_code($browserLang);
}
function brivacia_matomo_db_referrer(string $url, string $label, int $type = 0, string $keyword = ''): string
{
$label = trim($label);
$keyword = trim($keyword);
$labelLower = mb_strtolower($label, 'UTF-8');
$urlLower = mb_strtolower(trim($url), 'UTF-8');
// Matomo direct entry.
if (
$type === 1 ||
$label === '__direct__' ||
$labelLower === 'direct' ||
$labelLower === 'direct entry' ||
$labelLower === 'entrée directe' ||
$labelLower === 'newtab' ||
$labelLower === 'new tab' ||
$urlLower === 'about:newtab' ||
str_contains($urlLower, 'android-app://com.android.chrome')
) {
return mainSiteHost();
}
// Android Google app.
if (
str_contains($labelLower, 'googlequicksearchbox') ||
str_contains($urlLower, 'com.google.android.googlequicksearchbox')
) {
return 'google.com';
}
// Best case: Matomo has the real referrer URL.
$host = normalizeHost((string)(parse_url($url, PHP_URL_HOST) ?? ''));
if ($host !== '' && !isOwnHost($host)) {
return referrerCanonical($host);
}
// Fallback: Matomo often has a name but no URL.
// Try to match that name against referrers.json labels.
$key = normalizeHost($label !== '' ? $label : $keyword);
if ($key !== '') {
foreach (referrerLabels(true) as $canonical => $rule) {
if (normalizeHost((string)($rule['label'] ?? '')) === $key) {
return (string)$canonical;
}
foreach ((array)($rule['urls'] ?? []) as $knownUrl) {
if (normalizeHost((string)$knownUrl) === $key) {
return (string)$canonical;
}
}
}
}
return mainSiteHost();
}
function brivacia_import_matomo_db(array $input): array
{
$source = brivacia_matomo_db_connect($input);
$target = brivaciaDb();
$prefix = $input['db_prefix'];
$days = brivacia_import_range_days($input['from'], $input['to']);
$domainSql = brivacia_matomo_db_visit_domain_sql($input);
$pageDomainSql = $input['domain_filter'] !== '' ? 'AND a.name REGEXP :domain_filter' : '';
$params = [
':site' => (int)$input['site'],
':from' => $input['from'] . ' 00:00:00',
':to' => $input['to'] . ' 23:59:59',
];
if ($input['domain_filter'] !== '') {
$params[':domain_filter'] = brivacia_matomo_db_host_regex($input['domain_filter']);
}
$stats = [
'days' => count($days),
// Imported totals
'unique_visitors' => 0,
'visits' => 0,
'pageviews' => 0,
// Internal counters
'hits' => 0,
'countries' => 0,
'referrers' => 0,
'pages' => 0,
];
$target->beginTransaction();
/*
|--------------------------------------------------------------------------
| Replace existing imported data for this Brivacia site only
|--------------------------------------------------------------------------
|
| Matomo may track several domains under one site ID. Brivacia stores the
| selected destination site on every aggregate table, so only the matching
| site/date range is cleared before importing fresh values.
|
*/
if ($input['replace']) {
if ($input['import_hits']) {
$target->prepare('DELETE FROM hits_daily WHERE site = ? AND day BETWEEN ? AND ?')
->execute([$input['brivacia_site'], $input['from'], $input['to']]);
$target->prepare('DELETE FROM seen_daily WHERE site = ? AND day BETWEEN ? AND ?')
->execute([$input['brivacia_site'], $input['from'], $input['to']]);
}
if ($input['import_countries']) {
$target->prepare('DELETE FROM countries_daily WHERE site = ? AND day BETWEEN ? AND ?')
->execute([$input['brivacia_site'], $input['from'], $input['to']]);
}
if ($input['import_referrers']) {
$target->prepare('DELETE FROM referrers_daily WHERE site = ? AND day BETWEEN ? AND ?')
->execute([$input['brivacia_site'], $input['from'], $input['to']]);
}
if ($input['import_pages']) {
$target->prepare('DELETE FROM pages_daily WHERE site = ? AND day BETWEEN ? AND ?')
->execute([$input['brivacia_site'], $input['from'], $input['to']]);
}
}
/* Prepared upserts for site-scoped aggregate tables. */
$upsertHits = $target->prepare('
INSERT INTO hits_daily(site, day, unique_visitors, visits, pageviews, bots)
VALUES(:site, :day, :unique_visitors, :visits, :pageviews, 0)
ON CONFLICT(site, day) DO UPDATE SET
unique_visitors = excluded.unique_visitors,
visits = excluded.visits,
pageviews = excluded.pageviews
');
$upsertCountry = $target->prepare('
INSERT INTO countries_daily(site, day, country, views)
VALUES(:site, :day, :country, :views)
ON CONFLICT(site, day, country) DO UPDATE SET views = excluded.views
');
$upsertReferrer = $target->prepare('
INSERT INTO referrers_daily(site, day, referrer, views)
VALUES(:site, :day, :referrer, :views)
ON CONFLICT(site, day, referrer) DO UPDATE SET views = excluded.views
');
$upsertPage = $target->prepare('
INSERT INTO pages_daily(day, site, page_key, title, url, views, page_resolved)
VALUES(:day, :site, :page_key, :title, :url, :views, :page_resolved)
ON CONFLICT(day, site, page_key) DO UPDATE SET
title = excluded.title,
url = excluded.url,
views = excluded.views,
page_resolved = excluded.page_resolved
');
if ($input['import_hits']) {
$stmt = $source->prepare('
SELECT
DATE(l.server_time) AS day,
COUNT(DISTINCT l.idvisit) AS visits,
COUNT(DISTINCT v.idvisitor) AS unique_visitors
FROM ' . $prefix . 'log_link_visit_action l
JOIN ' . $prefix . 'log_visit v ON v.idvisit = l.idvisit
JOIN ' . $prefix . 'log_action a ON a.idaction = l.idaction_url
AND a.type = 1
WHERE l.idsite = :site
AND l.server_time BETWEEN :from AND :to
' . $pageDomainSql . '
GROUP BY DATE(l.server_time)
');
$stmt->execute($params);
$hitsByDay = [];
foreach ($stmt->fetchAll() ?: [] as $row) {
$hitsByDay[(string)$row['day']] = [
'visits' => max(0, (int)($row['visits'] ?? 0)),
'unique_visitors' => max(0, (int)($row['unique_visitors'] ?? 0)),
];
}
$stmt = $source->prepare('
SELECT
DATE(l.server_time) AS day,
COUNT(*) AS pageviews
FROM ' . $prefix . 'log_link_visit_action l
JOIN ' . $prefix . 'log_action a ON a.idaction = l.idaction_url
AND a.type = 1
WHERE l.idsite = :site
AND l.server_time BETWEEN :from AND :to
' . $pageDomainSql . '
GROUP BY DATE(l.server_time)
');
$stmt->execute($params);
$pageviewsByDay = [];
foreach ($stmt->fetchAll() ?: [] as $row) {
$pageviewsByDay[(string)$row['day']] = max(0, (int)($row['pageviews'] ?? 0));
}
foreach ($days as $day) {
$uniqueVisitors = $hitsByDay[$day]['unique_visitors'] ?? 0;
$visits = $hitsByDay[$day]['visits'] ?? 0;
$pageviews = $pageviewsByDay[$day] ?? 0;
if ($uniqueVisitors <= 0 && $visits <= 0 && $pageviews <= 0) {
continue;
}
$stats['unique_visitors'] += $uniqueVisitors;
$stats['visits'] += $visits;
$stats['pageviews'] += $pageviews;
$upsertHits->execute([
':site' => $input['brivacia_site'],
':day' => $day,
':unique_visitors' => $uniqueVisitors,
':visits' => $visits,
':pageviews' => $pageviews,
]);
$stats['hits']++;
}
}
if ($input['import_countries']) {
$stmt = $source->prepare('
SELECT
DATE(l.server_time) AS day,
LOWER(v.location_country) AS country,
COUNT(DISTINCT l.idvisit) AS views
FROM ' . $prefix . 'log_link_visit_action l
JOIN ' . $prefix . 'log_visit v ON v.idvisit = l.idvisit
JOIN ' . $prefix . 'log_action a ON a.idaction = l.idaction_url
AND a.type = 1
WHERE l.idsite = :site
AND l.server_time BETWEEN :from AND :to
AND v.location_country IS NOT NULL
AND v.location_country != ""
' . $pageDomainSql . '
GROUP BY DATE(l.server_time), LOWER(v.location_country)
');
$stmt->execute($params);
foreach ($stmt->fetchAll() ?: [] as $row) {
$country = normalizeCountryCode((string)($row['country'] ?? ''));
$views = max(0, (int)($row['views'] ?? 0));
if ($views <= 0) {
continue;
}
$upsertCountry->execute([
':site' => $input['brivacia_site'],
':day' => (string)$row['day'],
':country' => $country,
':views' => $views,
]);
$stats['countries']++;
}
}
if ($input['import_referrers']) {
$stmt = $source->prepare('
SELECT
DATE(l.server_time) AS day,
COALESCE(v.referer_type, 0) AS referer_type,
COALESCE(NULLIF(v.referer_url, ""), "") AS referer_url,
COALESCE(NULLIF(v.referer_name, ""), "__direct__") AS referer_name,
COALESCE(NULLIF(v.referer_keyword, ""), "") AS referer_keyword,
COUNT(DISTINCT l.idvisit) AS views
FROM ' . $prefix . 'log_link_visit_action l
JOIN ' . $prefix . 'log_visit v ON v.idvisit = l.idvisit
JOIN ' . $prefix . 'log_action a ON a.idaction = l.idaction_url
AND a.type = 1
WHERE l.idsite = :site
AND l.server_time BETWEEN :from AND :to
' . $pageDomainSql . '
GROUP BY
DATE(l.server_time),
COALESCE(v.referer_type, 0),
COALESCE(NULLIF(v.referer_url, ""), ""),
COALESCE(NULLIF(v.referer_name, ""), "__direct__"),
COALESCE(NULLIF(v.referer_keyword, ""), "")
');
$stmt->execute($params);
$referrersByDay = [];
foreach ($stmt->fetchAll() ?: [] as $row) {
$referrer = brivacia_matomo_db_referrer(
(string)($row['referer_url'] ?? ''),
(string)($row['referer_name'] ?? ''),
(int)($row['referer_type'] ?? 0),
(string)($row['referer_keyword'] ?? '')
);
$day = (string)$row['day'];
$views = max(0, (int)($row['views'] ?? 0));
if ($referrer === '' || $views <= 0) {
continue;
}
$referrer = mb_substr($referrer, 0, 120, 'UTF-8');
if (!isset($referrersByDay[$day][$referrer])) {
$referrersByDay[$day][$referrer] = 0;
}
$referrersByDay[$day][$referrer] += $views;
}
foreach ($referrersByDay as $day => $referrers) {
foreach ($referrers as $referrer => $views) {
$upsertReferrer->execute([
':site' => $input['brivacia_site'],
':day' => $day,
':referrer' => $referrer,
':views' => $views,
]);
$stats['referrers']++;
}
}
}
if ($input['import_pages']) {
$stmt = $source->prepare('
SELECT
DATE(l.server_time) AS day,
a.name AS page,
LOWER(COALESCE(v.location_browser_lang, "")) AS browser_lang,
COUNT(*) AS views
FROM ' . $prefix . 'log_link_visit_action l
JOIN ' . $prefix . 'log_visit v ON v.idvisit = l.idvisit
JOIN ' . $prefix . 'log_action a ON a.idaction = l.idaction_url
AND a.type = 1
WHERE l.idsite = :site
AND l.server_time BETWEEN :from AND :to
' . $pageDomainSql . '
GROUP BY DATE(l.server_time), a.name, LOWER(COALESCE(v.location_browser_lang, ""))
');
$stmt->execute($params);
foreach ($stmt->fetchAll() ?: [] as $row) {
$label = trim((string)($row['page'] ?? ''));
$views = max(0, (int)($row['views'] ?? 0));
if ($label === '' || $views <= 0) {
continue;
}
$url = brivacia_matomo_db_page_url($label);
$lang = brivacia_matomo_db_page_lang($url, (string)($row['browser_lang'] ?? ''));
$pageKey = brivacia_import_page_key($url, $label, $lang);
$title = mb_substr($pageKey !== '' ? $pageKey : $label, 0, 250, 'UTF-8');
$upsertPage->execute([
':day' => (string)$row['day'],
':site' => $input['brivacia_site'],
':page_key' => mb_substr($pageKey, 0, 500, 'UTF-8'),
':page_resolved' => 0,
':title' => $title,
':url' => mb_substr($url, 0, 1000, 'UTF-8'),
':views' => $views,
]);
$stats['pages']++;
}
}
$target->commit();
brivaciaLog(
'import/import.log',
'Import done | Provider: matomo_db' .
' | Site: ' . $input['site'] .
' | From: ' . $input['from'] .
' | To: ' . $input['to'] .
' | Hits: ' . $stats['hits'] .
' | Countries: ' . $stats['countries'] .
' | Referrers: ' . $stats['referrers'] .
' | Pages: ' . $stats['pages']
);
return $stats;
}