@@ -4,169 +4,71 @@ import { createLogger } from '@sim/logger'
44import { task } from '@trigger.dev/sdk'
55import { and , inArray , lt } from 'drizzle-orm'
66import { type CleanupJobPayload , resolveCleanupScope } from '@/lib/billing/cleanup-dispatcher'
7+ import {
8+ batchDeleteByWorkspaceAndTimestamp ,
9+ chunkedBatchDelete ,
10+ type TableCleanupResult ,
11+ } from '@/lib/cleanup/batch-delete'
712import { snapshotService } from '@/lib/logs/execution/snapshot/service'
813import { isUsingCloudStorage , StorageService } from '@/lib/uploads'
914import { deleteFileMetadata } from '@/lib/uploads/server/metadata'
1015
1116const logger = createLogger ( 'CleanupLogs' )
1217
13- const BATCH_SIZE = 2000
14- const MAX_BATCHES_PER_TIER = 10
15-
16- interface TierResults {
17- total : number
18- deleted : number
19- deleteFailed : number
18+ interface FileDeleteStats {
2019 filesTotal : number
2120 filesDeleted : number
2221 filesDeleteFailed : number
2322}
2423
25- function emptyTierResults ( ) : TierResults {
26- return {
27- total : 0 ,
28- deleted : 0 ,
29- deleteFailed : 0 ,
30- filesTotal : 0 ,
31- filesDeleted : 0 ,
32- filesDeleteFailed : 0 ,
33- }
34- }
35-
36- async function deleteExecutionFiles ( files : unknown , results : TierResults ) : Promise < void > {
24+ async function deleteExecutionFiles ( files : unknown , stats : FileDeleteStats ) : Promise < void > {
3725 if ( ! isUsingCloudStorage ( ) || ! files || ! Array . isArray ( files ) ) return
3826
3927 const keys = files . filter ( ( f ) => f && typeof f === 'object' && f . key ) . map ( ( f ) => f . key as string )
40- results . filesTotal += keys . length
28+ stats . filesTotal += keys . length
4129
4230 await Promise . all (
4331 keys . map ( async ( key ) => {
4432 try {
4533 await StorageService . deleteFile ( { key, context : 'execution' } )
4634 await deleteFileMetadata ( key )
47- results . filesDeleted ++
35+ stats . filesDeleted ++
4836 } catch ( fileError ) {
49- results . filesDeleteFailed ++
37+ stats . filesDeleteFailed ++
5038 logger . error ( `Failed to delete file ${ key } :` , { fileError } )
5139 }
5240 } )
5341 )
5442}
5543
56- async function cleanupTier (
57- workspaceIds : string [ ] ,
58- retentionDate : Date ,
59- label : string
60- ) : Promise < TierResults > {
61- const results = emptyTierResults ( )
62- if ( workspaceIds . length === 0 ) return results
63-
64- let batchesProcessed = 0
65- let hasMore = true
66-
67- while ( hasMore && batchesProcessed < MAX_BATCHES_PER_TIER ) {
68- const batch = await db
69- . select ( {
70- id : workflowExecutionLogs . id ,
71- files : workflowExecutionLogs . files ,
72- } )
73- . from ( workflowExecutionLogs )
74- . where (
75- and (
76- inArray ( workflowExecutionLogs . workspaceId , workspaceIds ) ,
77- lt ( workflowExecutionLogs . startedAt , retentionDate )
78- )
79- )
80- . limit ( BATCH_SIZE )
81-
82- results . total += batch . length
83-
84- if ( batch . length === 0 ) {
85- hasMore = false
86- break
87- }
88-
89- for ( const log of batch ) {
90- await deleteExecutionFiles ( log . files , results )
91- }
92-
93- const logIds = batch . map ( ( log ) => log . id )
94- try {
95- const deleted = await db
96- . delete ( workflowExecutionLogs )
97- . where ( inArray ( workflowExecutionLogs . id , logIds ) )
98- . returning ( { id : workflowExecutionLogs . id } )
99-
100- results . deleted += deleted . length
101- } catch ( deleteError ) {
102- results . deleteFailed += logIds . length
103- logger . error ( `Batch delete failed for ${ label } :` , { deleteError } )
104- }
105-
106- batchesProcessed ++
107- hasMore = batch . length === BATCH_SIZE
108-
109- logger . info ( `[${ label } ] Batch ${ batchesProcessed } : ${ batch . length } logs processed` )
110- }
111-
112- return results
113- }
114-
115- interface JobLogCleanupResults {
116- deleted : number
117- deleteFailed : number
118- }
119-
120- async function cleanupJobExecutionLogsTier (
44+ async function cleanupWorkflowExecutionLogs (
12145 workspaceIds : string [ ] ,
12246 retentionDate : Date ,
12347 label : string
124- ) : Promise < JobLogCleanupResults > {
125- const results : JobLogCleanupResults = { deleted : 0 , deleteFailed : 0 }
126- if ( workspaceIds . length === 0 ) return results
127-
128- let batchesProcessed = 0
129- let hasMore = true
130-
131- while ( hasMore && batchesProcessed < MAX_BATCHES_PER_TIER ) {
132- const batch = await db
133- . select ( { id : jobExecutionLogs . id } )
134- . from ( jobExecutionLogs )
135- . where (
136- and (
137- inArray ( jobExecutionLogs . workspaceId , workspaceIds ) ,
138- lt ( jobExecutionLogs . startedAt , retentionDate )
48+ ) : Promise < TableCleanupResult & FileDeleteStats > {
49+ const fileStats : FileDeleteStats = { filesTotal : 0 , filesDeleted : 0 , filesDeleteFailed : 0 }
50+
51+ const dbStats = await chunkedBatchDelete ( {
52+ tableDef : workflowExecutionLogs ,
53+ workspaceIds,
54+ tableName : `${ label } /workflow_execution_logs` ,
55+ selectChunk : ( chunkIds , limit ) =>
56+ db
57+ . select ( { id : workflowExecutionLogs . id , files : workflowExecutionLogs . files } )
58+ . from ( workflowExecutionLogs )
59+ . where (
60+ and (
61+ inArray ( workflowExecutionLogs . workspaceId , chunkIds ) ,
62+ lt ( workflowExecutionLogs . startedAt , retentionDate )
63+ )
13964 )
140- )
141- . limit ( BATCH_SIZE )
142-
143- if ( batch . length === 0 ) {
144- hasMore = false
145- break
146- }
65+ . limit ( limit ) ,
66+ onBatch : async ( rows ) => {
67+ for ( const row of rows ) await deleteExecutionFiles ( row . files , fileStats )
68+ } ,
69+ } )
14770
148- const logIds = batch . map ( ( log ) => log . id )
149- try {
150- const deleted = await db
151- . delete ( jobExecutionLogs )
152- . where ( inArray ( jobExecutionLogs . id , logIds ) )
153- . returning ( { id : jobExecutionLogs . id } )
154-
155- results . deleted += deleted . length
156- } catch ( deleteError ) {
157- results . deleteFailed += logIds . length
158- logger . error ( `Batch delete failed for ${ label } (job_execution_logs):` , { deleteError } )
159- }
160-
161- batchesProcessed ++
162- hasMore = batch . length === BATCH_SIZE
163-
164- logger . info (
165- `[${ label } ] job_execution_logs batch ${ batchesProcessed } : ${ batch . length } rows processed`
166- )
167- }
168-
169- return results
71+ return { ...dbStats , ...fileStats }
17072}
17173
17274export async function runCleanupLogs ( payload : CleanupJobPayload ) : Promise < void > {
@@ -190,15 +92,19 @@ export async function runCleanupLogs(payload: CleanupJobPayload): Promise<void>
19092 `[${ label } ] Cleaning ${ workspaceIds . length } workspaces, cutoff: ${ retentionDate . toISOString ( ) } `
19193 )
19294
193- const results = await cleanupTier ( workspaceIds , retentionDate , label )
95+ const workflowResults = await cleanupWorkflowExecutionLogs ( workspaceIds , retentionDate , label )
19496 logger . info (
195- `[${ label } ] workflow_execution_logs: ${ results . deleted } deleted, ${ results . deleteFailed } failed out of ${ results . total } candidates `
97+ `[${ label } ] workflow_execution_logs files : ${ workflowResults . filesDeleted } / ${ workflowResults . filesTotal } deleted, ${ workflowResults . filesDeleteFailed } failed `
19698 )
19799
198- const jobLogResults = await cleanupJobExecutionLogsTier ( workspaceIds , retentionDate , label )
199- logger . info (
200- `[${ label } ] job_execution_logs: ${ jobLogResults . deleted } deleted, ${ jobLogResults . deleteFailed } failed`
201- )
100+ await batchDeleteByWorkspaceAndTimestamp ( {
101+ tableDef : jobExecutionLogs ,
102+ workspaceIdCol : jobExecutionLogs . workspaceId ,
103+ timestampCol : jobExecutionLogs . startedAt ,
104+ workspaceIds,
105+ retentionDate,
106+ tableName : `${ label } /job_execution_logs` ,
107+ } )
202108
203109 // Snapshot cleanup runs only on the free job to avoid running it N times for N enterprise workspaces.
204110 if ( payload . plan === 'free' ) {
0 commit comments