api/v0: implement similarity search for media

Use a very naive approach: define similarity as the number of shared
tags between two media pieces. It can be implemented in SQL and produces
decent results.
This commit is contained in:
Lucas Gabriel Vuotto 2025-04-29 12:08:22 +00:00
parent 29aa172391
commit c625ee860a
2 changed files with 52 additions and 0 deletions

View file

@ -69,6 +69,8 @@ sub startup ($self)
$r->get("/media")->to("media#list")->name("list_media"); $r->get("/media")->to("media#list")->name("list_media");
$r->get("/media/<media_id:num>")->to("media#show")->name("show_media"); $r->get("/media/<media_id:num>")->to("media#show")->name("show_media");
$r->get("/media/similar/<media_id:num>")->to("media#similar")
->name("search_similar_media");
$r->get("/tag")->to("tags#show")->name("show_tag"); $r->get("/tag")->to("tags#show")->name("show_tag");
$r->get("/tags")->to("tags#list")->name("list_tags"); $r->get("/tags")->to("tags#list")->name("list_tags");

View file

@ -119,4 +119,54 @@ sub show ($self)
}); });
} }
# Executes
# SELECT *, COUNT(tag_id) AS similarity_score FROM tagged_media_view
# WHERE tag_id IN (
# SELECT tag_id FROM tagged_media_view WHERE media_id = m
# ) AND media_id != m
# GROUP BY media_id ORDER BY similarity_score
sub similar ($self)
{
my $media_id = $self->stash("media_id");
my $media = $self->schema->resultset("Media")
->single({id => $media_id});
return $self->render(
json => {error => "Media not found"},
status => 404,
) if !defined($media);
my %attrs = (
select => ["tag_id"],
rows => 100,
);
my $tags = $self->schema->resultset("MediaTag")
->search({media_id => $media_id}, \%attrs)->as_query;
my %search = (
media_id => {"!=", $media_id},
tag_id => {"-in", $tags},
);
%attrs = (
"+select" => [{count => "tag_id", -as => "similarity_score"}],
"+as" => ["similarity_score"],
group_by => "media_id",
order_by => {-desc => "similarity_score"},
rows => 6,
);
my @media = map +{
id => $_->media_id,
storage_id => $_->media_storage_id,
filename => $_->media_filename,
content_type => $_->media_content_type,
upload_datetime => $_->media_upload_datetime,
similarity_score => $_->get_column("similarity_score"),
}, $self->schema->resultset("TaggedMediaView")
->search(\%search, \%attrs)->all;
return $self->render(json => {
media => [@media],
});
}
1; 1;