Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions client/dive-common/apispec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,78 @@ export interface SegmentationStatusResponse {
ready?: boolean;
}

/**
* Text Query Types for open-vocabulary detection/segmentation
*/

/** A single detection returned from a text query */
export interface TextQueryDetection {
/** Bounding box [x1, y1, x2, y2] */
box: [number, number, number, number];
/** Polygon coordinates as [x, y] pairs */
polygon?: [number, number][];
/** Confidence score */
score: number;
/** Label/class name (often the query text) */
label: string;
/** Low-res mask for refinement (optional) */
lowResMask?: number[][];
}

export interface TextQueryRequest {
/** Path to the image file */
imagePath: string;
/** Text query describing what to find (e.g., "fish", "person swimming") */
text: string;
/** Confidence threshold for detections (default: 0.3) */
boxThreshold?: number;
/** Maximum number of detections to return (default: 10) */
maxDetections?: number;
/** Optional boxes to refine [x1, y1, x2, y2][] */
boxes?: [number, number, number, number][];
/** Optional keypoints for refinement [x, y][] */
points?: [number, number][];
/** Labels for points: 1 for foreground, 0 for background */
pointLabels?: number[];
/** Optional masks to refine */
masks?: number[][][];
}

export interface TextQueryResponse {
/** Whether the query succeeded */
success: boolean;
/** Error message if failed */
error?: string;
/** List of detections found */
detections?: TextQueryDetection[];
/** The original query text */
query?: string;
/** Whether fallback method was used (no native text support) */
fallback?: boolean;
}

export interface RefineDetectionsRequest {
/** Path to the image file */
imagePath: string;
/** Detections to refine */
detections: TextQueryDetection[];
/** Optional additional keypoints for refinement [x, y][] */
points?: [number, number][];
/** Labels for additional points: 1 for foreground, 0 for background */
pointLabels?: number[];
/** Whether to include refined masks in response */
refineMasks?: boolean;
}

export interface RefineDetectionsResponse {
/** Whether the refinement succeeded */
success: boolean;
/** Error message if failed */
error?: string;
/** Refined detections */
detections?: TextQueryDetection[];
}

export {
provideApi,
useApi,
Expand Down
179 changes: 179 additions & 0 deletions client/dive-common/components/EditorMenu.vue
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ export default defineComponent({
'set-annotation-state',
'update:tail-settings',
'update:show-user-created-icon',
'text-query-init',
'text-query',
'text-query-all-frames',
],
setup(props, { emit }) {
const toolTimeTimeout = ref<number | null>(null);
Expand All @@ -99,6 +102,59 @@ export default defineComponent({
localStorage.setItem(STORAGE_KEY, String(value));
});

// Text query state
const textQueryDialogOpen = ref(false);
const textQueryInput = ref('');
const textQueryLoading = ref(false);
const textQueryThreshold = ref(0.3);
const textQueryInitializing = ref(false);
const textQueryServiceError = ref('');
const textQueryAllFrames = ref(false);

const openTextQueryDialog = () => {
textQueryDialogOpen.value = true;
textQueryInput.value = '';
textQueryServiceError.value = '';
textQueryAllFrames.value = false;
textQueryInitializing.value = true;
emit('text-query-init');
};

const closeTextQueryDialog = () => {
textQueryDialogOpen.value = false;
textQueryInput.value = '';
textQueryServiceError.value = '';
textQueryInitializing.value = false;
textQueryAllFrames.value = false;
};

const onTextQueryServiceReady = (success: boolean, error?: string) => {
textQueryInitializing.value = false;
if (!success) {
textQueryServiceError.value = error || 'Text query service is not available';
}
};

const submitTextQuery = () => {
if (!textQueryInput.value.trim()) {
return;
}
textQueryLoading.value = true;
if (textQueryAllFrames.value) {
emit('text-query-all-frames', {
text: textQueryInput.value.trim(),
boxThreshold: textQueryThreshold.value,
});
} else {
emit('text-query', {
text: textQueryInput.value.trim(),
boxThreshold: textQueryThreshold.value,
});
}
closeTextQueryDialog();
textQueryLoading.value = false;
};

const modeToolTips = {
Creating: {
rectangle: 'Drag to draw rectangle. Press ESC to exit.',
Expand Down Expand Up @@ -151,6 +207,10 @@ export default defineComponent({

const mousetrap = computed((): Mousetrap[] => [
...flatten(editButtons.value.map((b) => b.mousetrap || [])),
{
bind: 't',
handler: () => openTextQueryDialog(),
},
]);

const activeEditButton = computed(() => editButtons.value.find((b) => b.active) || editButtons.value[0]);
Expand Down Expand Up @@ -225,6 +285,18 @@ export default defineComponent({
activeEditButton,
editButtonsMenuKey,
activeSegmentationRecipe,
// Text query
textQueryDialogOpen,
textQueryInput,
textQueryLoading,
textQueryThreshold,
textQueryInitializing,
textQueryServiceError,
textQueryAllFrames,
openTextQueryDialog,
closeTextQueryDialog,
onTextQueryServiceReady,
submitTextQuery,
};
},
});
Expand Down Expand Up @@ -366,6 +438,16 @@ export default defineComponent({
</v-icon>
</v-btn>
</template>
<!-- Text Query button -->
<v-btn
outlined
class="mx-1"
small
@click="openTextQueryDialog"
>
<pre>T:</pre>
<v-icon>mdi-text-search</v-icon>
</v-btn>
<!-- Segmentation Reset button -->
<template v-if="activeSegmentationRecipe && editingMode === 'Point'">
<v-divider
Expand Down Expand Up @@ -403,6 +485,103 @@ export default defineComponent({
@update:show-user-created-icon="$emit('update:show-user-created-icon', $event)"
/>
</div>

<!-- Text Query Dialog -->
<v-dialog
v-model="textQueryDialogOpen"
max-width="500"
>
<v-card>
<v-card-title class="text-h6">
<v-icon left>
mdi-text-search
</v-icon>
Text Query
</v-card-title>
<v-card-text>
<!-- Loading state while initializing service -->
<div
v-if="textQueryInitializing"
class="text-center py-4"
>
<v-progress-circular
indeterminate
color="primary"
size="48"
/>
<p class="text-body-2 mt-3">
Loading text query model...
</p>
</div>
<!-- Error state if service failed to initialize -->
<div
v-else-if="textQueryServiceError"
class="text-center py-4"
>
<v-icon
color="error"
size="48"
>
mdi-alert-circle
</v-icon>
<p class="text-body-2 mt-3 error--text">
{{ textQueryServiceError }}
</p>
</div>
<!-- Normal input form when service is ready -->
<template v-else>
<p class="text-body-2 mb-3">
Enter a description of objects to find in the current frame.
</p>
<v-text-field
v-model="textQueryInput"
label="Object description"
placeholder="e.g., fish swimming near coral"
outlined
dense
autofocus
:disabled="textQueryLoading"
@keyup.enter="submitTextQuery"
/>
<v-slider
v-model="textQueryThreshold"
label="Confidence threshold"
min="0.1"
max="0.9"
step="0.05"
thumb-label
:disabled="textQueryLoading"
/>
<v-checkbox
v-model="textQueryAllFrames"
label="Apply to all frames"
hint="Run across all frames instead of only the current (this will run as a job)"
persistent-hint
:disabled="textQueryLoading"
/>
</template>
</v-card-text>
<v-card-actions>
<v-spacer />
<v-btn
text
:disabled="textQueryLoading"
@click="closeTextQueryDialog"
>
{{ textQueryServiceError ? 'Close' : 'Cancel' }}
</v-btn>
<v-btn
v-if="!textQueryInitializing && !textQueryServiceError"
color="primary"
:loading="textQueryLoading"
:disabled="!textQueryInput.trim() || textQueryLoading"
@click="submitTextQuery"
>
Search
</v-btn>
</v-card-actions>
</v-card>
</v-dialog>
</v-row>
</template>

Expand Down
17 changes: 17 additions & 0 deletions client/dive-common/components/Viewer.vue
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,17 @@ export default defineComponent({
const controlsRef = ref();
const controlsHeight = ref(0);
const controlsCollapsed = ref(false);
const editorMenuRef = ref();

/**
* Forward text query service ready status to EditorMenu
* Called by ViewerLoader when text query service initialization completes
*/
function onTextQueryServiceReady(success: boolean, error?: string) {
if (editorMenuRef.value?.onTextQueryServiceReady) {
editorMenuRef.value.onTextQueryServiceReady(success, error);
}
}

const sideBarCollapsed = ref(false);
// Sidebar mode: 'left', 'bottom', or 'collapsed'
Expand Down Expand Up @@ -1190,6 +1201,8 @@ export default defineComponent({
controlsHeight,
controlsCollapsed,
sideBarCollapsed,
editorMenuRef,
onTextQueryServiceReady,
sidebarMode,
cycleSidebarMode,
sidebarModeIcon,
Expand Down Expand Up @@ -1372,6 +1385,7 @@ export default defineComponent({
</v-tooltip>

<EditorMenu
ref="editorMenuRef"
v-bind="{
editingMode,
visibleModes,
Expand All @@ -1387,6 +1401,9 @@ export default defineComponent({
:show-user-created-icon.sync="clientSettings.annotatorPreferences.showUserCreatedIcon"
@set-annotation-state="handler.setAnnotationState"
@exit-edit="handler.trackAbort"
@text-query-init="$emit('text-query-init')"
@text-query="$emit('text-query', $event)"
@text-query-all-frames="$emit('text-query-all-frames', $event)"
>
<template slot="delete-controls">
<delete-controls
Expand Down
43 changes: 43 additions & 0 deletions client/platform/desktop/backend/ipcService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,49 @@ export default function register() {
return { ready: segService.isSegmentationReady() };
});

ipcMain.handle('segmentation-text-query', async (_, args: {
imagePath: string;
text: string;
boxThreshold?: number;
maxDetections?: number;
boxes?: [number, number, number, number][];
points?: [number, number][];
pointLabels?: number[];
}) => {
const segService = getInteractiveServiceManager();

// Auto-initialize if not ready
if (!segService.isSegmentationReady()) {
await segService.initialize(settings.get());
}

const response = await segService.textQuery(args);
return response;
});

ipcMain.handle('segmentation-refine', async (_, args: {
imagePath: string;
detections: {
box: [number, number, number, number];
polygon?: [number, number][];
score: number;
label: string;
}[];
points?: [number, number][];
pointLabels?: number[];
refineMasks?: boolean;
}) => {
const segService = getInteractiveServiceManager();

// Auto-initialize if not ready
if (!segService.isSegmentationReady()) {
await segService.initialize(settings.get());
}

const response = await segService.refineDetections(args);
return response;
});

/**
* Interactive Stereo Service
*/
Expand Down
Loading
Loading