Init V4 community edition (#2265)

* Init V4 community edition * Init V4 community edition
2025-04-20 17:31:25 +08:00
parent da4e44b77a
commit 21d158db07
597 changed files with 119415 additions and 41692 deletions
--- a/pkg/queue/metric.go
+++ b/pkg/queue/metric.go
@@ -0,0 +1,79 @@
+package queue
+
+import "sync/atomic"
+
+// Metric interface
+type Metric interface {
+	IncBusyWorker()
+	DecBusyWorker()
+	BusyWorkers() uint64
+	SuccessTasks() uint64
+	FailureTasks() uint64
+	SubmittedTasks() uint64
+	IncSuccessTask()
+	IncFailureTask()
+	IncSubmittedTask()
+}
+
+var _ Metric = (*metric)(nil)
+
+type metric struct {
+	busyWorkers     uint64
+	successTasks    uint64
+	failureTasks    uint64
+	submittedTasks  uint64
+	suspendingTasks uint64
+}
+
+// NewMetric for default metric structure
+func NewMetric() Metric {
+	return &metric{}
+}
+
+func (m *metric) IncBusyWorker() {
+	atomic.AddUint64(&m.busyWorkers, 1)
+}
+
+func (m *metric) DecBusyWorker() {
+	atomic.AddUint64(&m.busyWorkers, ^uint64(0))
+}
+
+func (m *metric) BusyWorkers() uint64 {
+	return atomic.LoadUint64(&m.busyWorkers)
+}
+
+func (m *metric) IncSuccessTask() {
+	atomic.AddUint64(&m.successTasks, 1)
+}
+
+func (m *metric) IncFailureTask() {
+	atomic.AddUint64(&m.failureTasks, 1)
+}
+
+func (m *metric) IncSubmittedTask() {
+	atomic.AddUint64(&m.submittedTasks, 1)
+}
+
+func (m *metric) SuccessTasks() uint64 {
+	return atomic.LoadUint64(&m.successTasks)
+}
+
+func (m *metric) FailureTasks() uint64 {
+	return atomic.LoadUint64(&m.failureTasks)
+}
+
+func (m *metric) SubmittedTasks() uint64 {
+	return atomic.LoadUint64(&m.submittedTasks)
+}
+
+func (m *metric) SuspendingTasks() uint64 {
+	return atomic.LoadUint64(&m.suspendingTasks)
+}
+
+func (m *metric) IncSuspendingTask() {
+	atomic.AddUint64(&m.suspendingTasks, 1)
+}
+
+func (m *metric) DecSuspendingTask() {
+	atomic.AddUint64(&m.suspendingTasks, ^uint64(0))
+}
--- a/pkg/queue/options.go
+++ b/pkg/queue/options.go
@@ -0,0 +1,109 @@
+package queue
+
+import (
+	"runtime"
+	"time"
+)
+
+// An Option configures a mutex.
+type Option interface {
+	apply(*options)
+}
+
+// OptionFunc is a function that configures a queue.
+type OptionFunc func(*options)
+
+// Apply calls f(option)
+func (f OptionFunc) apply(option *options) {
+	f(option)
+}
+
+type options struct {
+	maxTaskExecution   time.Duration // Maximum execution time for a Task.
+	retryDelay         time.Duration
+	taskPullInterval   time.Duration
+	backoffFactor      float64
+	backoffMaxDuration time.Duration
+	maxRetry           int
+	resumeTaskType     []string
+	workerCount        int
+	name               string
+}
+
+func newDefaultOptions() *options {
+	return &options{
+		workerCount:        runtime.NumCPU(),
+		maxTaskExecution:   60 * time.Hour,
+		backoffFactor:      2,
+		backoffMaxDuration: 60 * time.Second,
+		resumeTaskType:     []string{},
+		taskPullInterval:   1 * time.Second,
+		name:               "default",
+	}
+}
+
+// WithMaxTaskExecution set maximum execution time for a Task.
+func WithMaxTaskExecution(d time.Duration) Option {
+	return OptionFunc(func(q *options) {
+		q.maxTaskExecution = d
+	})
+}
+
+// WithRetryDelay set retry delay
+func WithRetryDelay(d time.Duration) Option {
+	return OptionFunc(func(q *options) {
+		q.retryDelay = d
+	})
+}
+
+// WithBackoffFactor set backoff factor
+func WithBackoffFactor(f float64) Option {
+	return OptionFunc(func(q *options) {
+		q.backoffFactor = f
+	})
+}
+
+// WithBackoffMaxDuration set backoff max duration
+func WithBackoffMaxDuration(d time.Duration) Option {
+	return OptionFunc(func(q *options) {
+		q.backoffMaxDuration = d
+	})
+}
+
+// WithMaxRetry set max retry
+func WithMaxRetry(n int) Option {
+	return OptionFunc(func(q *options) {
+		q.maxRetry = n
+	})
+}
+
+// WithResumeTaskType set resume Task type
+func WithResumeTaskType(types ...string) Option {
+	return OptionFunc(func(q *options) {
+		q.resumeTaskType = types
+	})
+}
+
+// WithWorkerCount set worker count
+func WithWorkerCount(num int) Option {
+	return OptionFunc(func(q *options) {
+		if num <= 0 {
+			num = runtime.NumCPU()
+		}
+		q.workerCount = num
+	})
+}
+
+// WithName set queue name
+func WithName(name string) Option {
+	return OptionFunc(func(q *options) {
+		q.name = name
+	})
+}
+
+// WithTaskPullInterval set task pull interval
+func WithTaskPullInterval(d time.Duration) Option {
+	return OptionFunc(func(q *options) {
+		q.taskPullInterval = d
+	})
+}
--- a/pkg/queue/queue.go
+++ b/pkg/queue/queue.go
@@ -0,0 +1,437 @@
+package queue
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/cloudreve/Cloudreve/v4/ent/task"
+	"github.com/cloudreve/Cloudreve/v4/inventory"
+	"github.com/cloudreve/Cloudreve/v4/pkg/logging"
+	"github.com/jpillora/backoff"
+)
+
+type (
+	Queue interface {
+		// Start resume tasks and starts all workers.
+		Start()
+		// Shutdown stops all workers.
+		Shutdown()
+		// SubmitTask submits a Task to the queue.
+		QueueTask(ctx context.Context, t Task) error
+		// BusyWorkers returns the numbers of workers in the running process.
+		BusyWorkers() int
+		// BusyWorkers returns the numbers of success tasks.
+		SuccessTasks() int
+		// FailureTasks returns the numbers of failure tasks.
+		FailureTasks() int
+		// SubmittedTasks returns the numbers of submitted tasks.
+		SubmittedTasks() int
+		// SuspendingTasks returns the numbers of suspending tasks.
+		SuspendingTasks() int
+	}
+	queue struct {
+		sync.Mutex
+		routineGroup *routineGroup
+		metric       *metric
+		quit         chan struct{}
+		ready        chan struct{}
+		scheduler    Scheduler
+		stopOnce     sync.Once
+		stopFlag     int32
+		rootCtx      context.Context
+		cancel       context.CancelFunc
+
+		// Dependencies
+		logger     logging.Logger
+		taskClient inventory.TaskClient
+		dep        Dep
+		registry   TaskRegistry
+
+		// Options
+		*options
+	}
+
+	Dep interface {
+		ForkWithLogger(ctx context.Context, l logging.Logger) context.Context
+	}
+)
+
+var (
+	CriticalErr = errors.New("non-retryable error")
+)
+
+func New(l logging.Logger, taskClient inventory.TaskClient, registry TaskRegistry, dep Dep, opts ...Option) Queue {
+	o := newDefaultOptions()
+	for _, opt := range opts {
+		opt.apply(o)
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+
+	return &queue{
+		routineGroup: newRoutineGroup(),
+		scheduler:    NewFifoScheduler(0, l),
+		quit:         make(chan struct{}),
+		ready:        make(chan struct{}, 1),
+		metric:       &metric{},
+		options:      o,
+		logger:       l,
+		registry:     registry,
+		taskClient:   taskClient,
+		dep:          dep,
+		rootCtx:      ctx,
+		cancel:       cancel,
+	}
+}
+
+// Start to enable all worker
+func (q *queue) Start() {
+	q.routineGroup.Run(func() {
+		// Resume tasks in DB
+		if len(q.options.resumeTaskType) > 0 && q.taskClient != nil {
+
+			ctx := context.TODO()
+			ctx = context.WithValue(ctx, inventory.LoadTaskUser{}, true)
+			ctx = context.WithValue(ctx, inventory.LoadUserGroup{}, true)
+			tasks, err := q.taskClient.GetPendingTasks(ctx, q.resumeTaskType...)
+			if err != nil {
+				q.logger.Warning("Failed to get pending tasks from DB for given type %v: %s", q.resumeTaskType, err)
+			}
+
+			resumed := 0
+			for _, t := range tasks {
+				resumedTask, err := NewTaskFromModel(t)
+				if err != nil {
+					q.logger.Warning("Failed to resume task %d: %s", t.ID, err)
+					continue
+				}
+
+				if resumedTask.Status() == task.StatusSuspending {
+					q.metric.IncSuspendingTask()
+					q.metric.IncSubmittedTask()
+				}
+
+				if err := q.QueueTask(ctx, resumedTask); err != nil {
+					q.logger.Warning("Failed to resume task %d: %s", t.ID, err)
+				}
+				resumed++
+			}
+
+			q.logger.Info("Resumed %d tasks from DB.", resumed)
+		}
+
+		q.start()
+	})
+	q.logger.Info("Queue %q started with %d workers.", q.name, q.workerCount)
+}
+
+// Shutdown stops all queues.
+func (q *queue) Shutdown() {
+	q.logger.Info("Shutting down queue %q...", q.name)
+	defer func() {
+		q.routineGroup.Wait()
+	}()
+
+	if !atomic.CompareAndSwapInt32(&q.stopFlag, 0, 1) {
+		return
+	}
+
+	q.stopOnce.Do(func() {
+		q.cancel()
+		if q.metric.BusyWorkers() > 0 {
+			q.logger.Info("shutdown all tasks in queue %q: %d workers", q.name, q.metric.BusyWorkers())
+		}
+
+		if err := q.scheduler.Shutdown(); err != nil {
+			q.logger.Error("failed to shutdown scheduler in queue %q: %w", q.name, err)
+		}
+		close(q.quit)
+	})
+
+}
+
+// BusyWorkers returns the numbers of workers in the running process.
+func (q *queue) BusyWorkers() int {
+	return int(q.metric.BusyWorkers())
+}
+
+// BusyWorkers returns the numbers of success tasks.
+func (q *queue) SuccessTasks() int {
+	return int(q.metric.SuccessTasks())
+}
+
+// BusyWorkers returns the numbers of failure tasks.
+func (q *queue) FailureTasks() int {
+	return int(q.metric.FailureTasks())
+}
+
+// BusyWorkers returns the numbers of submitted tasks.
+func (q *queue) SubmittedTasks() int {
+	return int(q.metric.SubmittedTasks())
+}
+
+// SuspendingTasks returns the numbers of suspending tasks.
+func (q *queue) SuspendingTasks() int {
+	return int(q.metric.SuspendingTasks())
+}
+
+// QueueTask to queue single Task
+func (q *queue) QueueTask(ctx context.Context, t Task) error {
+	if atomic.LoadInt32(&q.stopFlag) == 1 {
+		return ErrQueueShutdown
+	}
+
+	if t.Status() != task.StatusSuspending {
+		q.metric.IncSubmittedTask()
+		if err := q.transitStatus(ctx, t, task.StatusQueued); err != nil {
+			return err
+		}
+	}
+
+	if err := q.scheduler.Queue(t); err != nil {
+		return err
+	}
+	owner := ""
+	if t.Owner() != nil {
+		owner = t.Owner().Email
+	}
+	q.logger.Info("New Task with type %q submitted to queue %q by %q", t.Type(), q.name, owner)
+	if q.registry != nil {
+		q.registry.Set(t.ID(), t)
+	}
+
+	return nil
+}
+
+// newContext creates a new context for a new Task iteration.
+func (q *queue) newContext(t Task) context.Context {
+	l := q.logger.CopyWithPrefix(fmt.Sprintf("[Cid: %s TaskID: %d Queue: %s]", t.CorrelationID(), t.ID(), q.name))
+	ctx := q.dep.ForkWithLogger(q.rootCtx, l)
+	ctx = context.WithValue(ctx, logging.CorrelationIDCtx{}, t.CorrelationID())
+	ctx = context.WithValue(ctx, logging.LoggerCtx{}, l)
+	ctx = context.WithValue(ctx, inventory.UserCtx{}, t.Owner())
+	return ctx
+}
+
+func (q *queue) work(t Task) {
+	ctx := q.newContext(t)
+	l := logging.FromContext(ctx)
+	timeIterationStart := time.Now()
+
+	var err error
+	// to handle panic cases from inside the worker
+	// in such case, we start a new goroutine
+	defer func() {
+		q.metric.DecBusyWorker()
+		e := recover()
+		if e != nil {
+			l.Error("Panic error in queue %q: %v", q.name, e)
+			t.OnError(fmt.Errorf("panic error: %v", e), time.Since(timeIterationStart))
+
+			_ = q.transitStatus(ctx, t, task.StatusError)
+		}
+		q.schedule()
+	}()
+
+	err = q.transitStatus(ctx, t, task.StatusProcessing)
+	if err != nil {
+		l.Error("failed to transit task %d to processing: %s", t.ID(), err.Error())
+		panic(err)
+	}
+
+	for {
+		timeIterationStart = time.Now()
+		var next task.Status
+		next, err = q.run(ctx, t)
+		if err != nil {
+			t.OnError(err, time.Since(timeIterationStart))
+			l.Error("runtime error in queue %q: %s", q.name, err.Error())
+
+			_ = q.transitStatus(ctx, t, task.StatusError)
+			break
+		}
+
+		// iteration completes
+		t.OnIterationComplete(time.Since(timeIterationStart))
+		_ = q.transitStatus(ctx, t, next)
+		if next != task.StatusProcessing {
+			break
+		}
+	}
+}
+
+func (q *queue) run(ctx context.Context, t Task) (task.Status, error) {
+	l := logging.FromContext(ctx)
+
+	// create channel with buffer size 1 to avoid goroutine leak
+	done := make(chan struct {
+		err  error
+		next task.Status
+	}, 1)
+	panicChan := make(chan interface{}, 1)
+	startTime := time.Now()
+	ctx, cancel := context.WithTimeout(ctx, q.maxTaskExecution-t.Executed())
+	defer func() {
+		cancel()
+	}()
+
+	// run the job
+	go func() {
+		// handle panic issue
+		defer func() {
+			if p := recover(); p != nil {
+				panicChan <- p
+			}
+		}()
+
+		l.Debug("Iteration started.")
+		next, err := t.Do(ctx)
+		l.Debug("Iteration ended with err=%s", err)
+		if err != nil && q.maxRetry-t.Retried() > 0 && !errors.Is(err, CriticalErr) && atomic.LoadInt32(&q.stopFlag) != 1 {
+			// Retry needed
+			t.OnRetry(err)
+			b := &backoff.Backoff{
+				Max:    q.backoffMaxDuration,
+				Factor: q.backoffFactor,
+			}
+			delay := q.retryDelay
+			if q.retryDelay == 0 {
+				delay = b.ForAttempt(float64(t.Retried()))
+			}
+
+			// Resume after to retry
+			l.Info("Will be retried in %s", delay)
+			t.OnSuspend(time.Now().Add(delay).Unix())
+			err = nil
+			next = task.StatusSuspending
+		}
+
+		done <- struct {
+			err  error
+			next task.Status
+		}{err: err, next: next}
+	}()
+
+	select {
+	case p := <-panicChan:
+		panic(p)
+	case <-ctx.Done(): // timeout reached
+		return task.StatusError, ctx.Err()
+	case <-q.quit: // shutdown service
+		// cancel job
+		cancel()
+
+		leftTime := q.maxTaskExecution - t.Executed() - time.Since(startTime)
+		// wait job
+		select {
+		case <-time.After(leftTime):
+			return task.StatusError, context.DeadlineExceeded
+		case r := <-done: // job finish
+			return r.next, r.err
+		case p := <-panicChan:
+			panic(p)
+		}
+	case r := <-done: // job finish
+		return r.next, r.err
+	}
+}
+
+// beforeTaskStart updates Task status from queued to processing
+func (q *queue) transitStatus(ctx context.Context, task Task, to task.Status) (err error) {
+	old := task.Status()
+	transition, ok := stateTransitions[task.Status()][to]
+	if !ok {
+		err = fmt.Errorf("invalid state transition from %s to %s", old, to)
+	} else {
+		if innerErr := transition(ctx, task, to, q); innerErr != nil {
+			err = fmt.Errorf("failed to transit Task status from %s to %s: %w", old, to, innerErr)
+		}
+	}
+
+	l := logging.FromContext(ctx)
+	if err != nil {
+		l.Error(err.Error())
+	}
+
+	l.Info("Task %d status changed from %q to %q.", task.ID(), old, to)
+	return
+}
+
+// schedule to check worker number
+func (q *queue) schedule() {
+	q.Lock()
+	defer q.Unlock()
+	if q.BusyWorkers() >= q.workerCount {
+		return
+	}
+
+	select {
+	case q.ready <- struct{}{}:
+	default:
+	}
+}
+
+// start to start all worker
+func (q *queue) start() {
+	tasks := make(chan Task, 1)
+
+	for {
+		// check worker number
+		q.schedule()
+
+		select {
+		// wait worker ready
+		case <-q.ready:
+		case <-q.quit:
+			return
+		}
+
+		// request Task from queue in background
+		q.routineGroup.Run(func() {
+			for {
+				t, err := q.scheduler.Request()
+				if t == nil || err != nil {
+					if err != nil {
+						select {
+						case <-q.quit:
+							if !errors.Is(err, ErrNoTaskInQueue) {
+								close(tasks)
+								return
+							}
+						case <-time.After(q.taskPullInterval):
+							// sleep to fetch new Task
+						}
+					}
+				}
+				if t != nil {
+					tasks <- t
+					return
+				}
+
+				select {
+				case <-q.quit:
+					if !errors.Is(err, ErrNoTaskInQueue) {
+						close(tasks)
+						return
+					}
+				default:
+				}
+			}
+		})
+
+		t, ok := <-tasks
+		if !ok {
+			return
+		}
+
+		// start new Task
+		q.metric.IncBusyWorker()
+		q.routineGroup.Run(func() {
+			q.work(t)
+		})
+	}
+}
--- a/pkg/queue/registry.go
+++ b/pkg/queue/registry.go
@@ -0,0 +1,60 @@
+package queue
+
+import "sync"
+
+type (
+	// TaskRegistry is used in slave node to track in-memory stateful tasks.
+	TaskRegistry interface {
+		// NextID returns the next available Task ID.
+		NextID() int
+		// Get returns the Task by ID.
+		Get(id int) (Task, bool)
+		// Set sets the Task by ID.
+		Set(id int, t Task)
+		// Delete deletes the Task by ID.
+		Delete(id int)
+	}
+
+	taskRegistry struct {
+		tasks   map[int]Task
+		current int
+		mu      sync.Mutex
+	}
+)
+
+// NewTaskRegistry creates a new TaskRegistry.
+func NewTaskRegistry() TaskRegistry {
+	return &taskRegistry{
+		tasks: make(map[int]Task),
+	}
+}
+
+func (r *taskRegistry) NextID() int {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	r.current++
+	return r.current
+}
+
+func (r *taskRegistry) Get(id int) (Task, bool) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	t, ok := r.tasks[id]
+	return t, ok
+}
+
+func (r *taskRegistry) Set(id int, t Task) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	r.tasks[id] = t
+}
+
+func (r *taskRegistry) Delete(id int) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+
+	delete(r.tasks, id)
+}
--- a/pkg/queue/scheduler.go
+++ b/pkg/queue/scheduler.go
@@ -0,0 +1,124 @@
+package queue
+
+import (
+	"errors"
+	"github.com/cloudreve/Cloudreve/v4/pkg/logging"
+	"sync"
+	"sync/atomic"
+	"time"
+)
+
+var (
+	// ErrQueueShutdown the queue is released and closed.
+	ErrQueueShutdown = errors.New("queue has been closed and released")
+	// ErrMaxCapacity Maximum size limit reached
+	ErrMaxCapacity = errors.New("golang-queue: maximum size limit reached")
+	// ErrNoTaskInQueue there is nothing in the queue
+	ErrNoTaskInQueue = errors.New("golang-queue: no Task in queue")
+)
+
+type (
+	Scheduler interface {
+		// Queue add a new Task into the queue
+		Queue(task Task) error
+		// Request get a new Task from the queue
+		Request() (Task, error)
+		// Shutdown stop all worker
+		Shutdown() error
+	}
+	fifoScheduler struct {
+		sync.Mutex
+		taskQueue taskHeap
+		capacity  int
+		count     int
+		exit      chan struct{}
+		logger    logging.Logger
+		stopOnce  sync.Once
+		stopFlag  int32
+	}
+	taskHeap []Task
+)
+
+// Queue send Task to the buffer channel
+func (s *fifoScheduler) Queue(task Task) error {
+	if atomic.LoadInt32(&s.stopFlag) == 1 {
+		return ErrQueueShutdown
+	}
+	if s.capacity > 0 && s.count >= s.capacity {
+		return ErrMaxCapacity
+	}
+
+	s.Lock()
+	s.taskQueue.Push(task)
+	s.count++
+	s.Unlock()
+
+	return nil
+}
+
+// Request a new Task from channel
+func (s *fifoScheduler) Request() (Task, error) {
+	if atomic.LoadInt32(&s.stopFlag) == 1 {
+		return nil, ErrQueueShutdown
+	}
+
+	if s.count == 0 {
+		return nil, ErrNoTaskInQueue
+	}
+	s.Lock()
+	if s.taskQueue[s.taskQueue.Len()-1].ResumeTime() > time.Now().Unix() {
+		s.Unlock()
+		return nil, ErrNoTaskInQueue
+	}
+
+	data := s.taskQueue.Pop()
+	s.count--
+	s.Unlock()
+
+	return data.(Task), nil
+}
+
+// Shutdown the worker
+func (s *fifoScheduler) Shutdown() error {
+	if !atomic.CompareAndSwapInt32(&s.stopFlag, 0, 1) {
+		return ErrQueueShutdown
+	}
+
+	return nil
+}
+
+// NewFifoScheduler for create new Scheduler instance
+func NewFifoScheduler(queueSize int, logger logging.Logger) Scheduler {
+	w := &fifoScheduler{
+		taskQueue: make([]Task, 2),
+		capacity:  queueSize,
+		logger:    logger,
+	}
+
+	return w
+}
+
+// Implement heap.Interface
+func (h taskHeap) Len() int {
+	return len(h)
+}
+
+func (h taskHeap) Less(i, j int) bool {
+	return h[i].ResumeTime() < h[j].ResumeTime()
+}
+
+func (h taskHeap) Swap(i, j int) {
+	h[i], h[j] = h[j], h[i]
+}
+
+func (h *taskHeap) Push(x any) {
+	*h = append(*h, x.(Task))
+}
+
+func (h *taskHeap) Pop() any {
+	old := *h
+	n := len(old)
+	x := old[n-1]
+	*h = old[0 : n-1]
+	return x
+}
--- a/pkg/queue/task.go
+++ b/pkg/queue/task.go
@@ -0,0 +1,526 @@
+package queue
+
+import (
+	"context"
+	"encoding/gob"
+	"errors"
+	"fmt"
+	"sync"
+	"time"
+
+	"github.com/cloudreve/Cloudreve/v4/ent"
+	"github.com/cloudreve/Cloudreve/v4/ent/task"
+	"github.com/cloudreve/Cloudreve/v4/inventory"
+	"github.com/cloudreve/Cloudreve/v4/inventory/types"
+	"github.com/cloudreve/Cloudreve/v4/pkg/hashid"
+	"github.com/cloudreve/Cloudreve/v4/pkg/logging"
+	"github.com/gofrs/uuid"
+	"github.com/samber/lo"
+)
+
+type (
+	Task interface {
+		Do(ctx context.Context) (task.Status, error)
+
+		// ID returns the Task ID
+		ID() int
+		// Type returns the Task type
+		Type() string
+		// Status returns the Task status
+		Status() task.Status
+		// Owner returns the Task owner
+		Owner() *ent.User
+		// State returns the internal Task state
+		State() string
+		// ShouldPersist returns true if the Task should be persisted into DB
+		ShouldPersist() bool
+		// Persisted returns true if the Task is persisted in DB
+		Persisted() bool
+		// Executed returns the duration of the Task execution
+		Executed() time.Duration
+		// Retried returns the number of times the Task has been retried
+		Retried() int
+		// Error returns the error of the Task
+		Error() error
+		// ErrorHistory returns the error history of the Task
+		ErrorHistory() []error
+		// Model returns the ent model of the Task
+		Model() *ent.Task
+		// CorrelationID returns the correlation ID of the Task
+		CorrelationID() uuid.UUID
+		// ResumeTime returns the time when the Task is resumed
+		ResumeTime() int64
+		// ResumeAfter sets the time when the Task should be resumed
+		ResumeAfter(next time.Duration)
+		Progress(ctx context.Context) Progresses
+		// Summarize returns the Task summary for UI display
+		Summarize(hasher hashid.Encoder) *Summary
+		// OnSuspend is called when queue decides to suspend the Task
+		OnSuspend(time int64)
+		// OnPersisted is called when the Task is persisted or updated in DB
+		OnPersisted(task *ent.Task)
+		// OnError is called when the Task encounters an error
+		OnError(err error, d time.Duration)
+		// OnRetry is called when the iteration returns error and before retry
+		OnRetry(err error)
+		// OnIterationComplete is called when the one iteration is completed
+		OnIterationComplete(executed time.Duration)
+		// OnStatusTransition is called when the Task status is changed
+		OnStatusTransition(newStatus task.Status)
+
+		// Cleanup is called when the Task is done or error.
+		Cleanup(ctx context.Context) error
+
+		Lock()
+		Unlock()
+	}
+	ResumableTaskFactory func(model *ent.Task) Task
+	Progress             struct {
+		Total      int64  `json:"total"`
+		Current    int64  `json:"current"`
+		Identifier string `json:"identifier"`
+	}
+	Progresses map[string]*Progress
+	Summary    struct {
+		NodeID int            `json:"-"`
+		Phase  string         `json:"phase,omitempty"`
+		Props  map[string]any `json:"props,omitempty"`
+	}
+
+	stateTransition func(ctx context.Context, task Task, newStatus task.Status, q *queue) error
+)
+
+var (
+	taskFactories sync.Map
+)
+
+const (
+	MediaMetaTaskType             = "media_meta"
+	EntityRecycleRoutineTaskType  = "entity_recycle_routine"
+	ExplicitEntityRecycleTaskType = "explicit_entity_recycle"
+	UploadSentinelCheckTaskType   = "upload_sentinel_check"
+	CreateArchiveTaskType         = "create_archive"
+	ExtractArchiveTaskType        = "extract_archive"
+	RelocateTaskType              = "relocate"
+	RemoteDownloadTaskType        = "remote_download"
+
+	SlaveCreateArchiveTaskType = "slave_create_archive"
+	SlaveUploadTaskType        = "slave_upload"
+	SlaveExtractArchiveType    = "slave_extract_archive"
+)
+
+func init() {
+	gob.Register(Progresses{})
+}
+
+// RegisterResumableTaskFactory registers a resumable Task factory
+func RegisterResumableTaskFactory(taskType string, factory ResumableTaskFactory) {
+	taskFactories.Store(taskType, factory)
+}
+
+// NewTaskFromModel creates a Task from ent.Task model
+func NewTaskFromModel(model *ent.Task) (Task, error) {
+	if factory, ok := taskFactories.Load(model.Type); ok {
+		return factory.(ResumableTaskFactory)(model), nil
+	}
+
+	return nil, fmt.Errorf("unknown Task type: %s", model.Type)
+}
+
+// InMemoryTask implements part Task interface using in-memory data.
+type InMemoryTask struct {
+	*DBTask
+}
+
+func (i *InMemoryTask) ShouldPersist() bool {
+	return false
+}
+
+func (t *InMemoryTask) OnStatusTransition(newStatus task.Status) {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	if t.Task != nil {
+		t.Task.Status = newStatus
+	}
+}
+
+// DBTask implements Task interface related to DB schema
+type DBTask struct {
+	DirectOwner *ent.User
+	Task        *ent.Task
+
+	mu sync.Mutex
+}
+
+func (t *DBTask) ID() int {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	if t.Task != nil {
+		return t.Task.ID
+	}
+	return 0
+}
+
+func (t *DBTask) Status() task.Status {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	if t.Task != nil {
+		return t.Task.Status
+	}
+	return ""
+}
+
+func (t *DBTask) Type() string {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+	return t.Task.Type
+}
+
+func (t *DBTask) Owner() *ent.User {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	if t.DirectOwner != nil {
+		return t.DirectOwner
+	}
+	if t.Task != nil {
+		return t.Task.Edges.User
+	}
+	return nil
+}
+
+func (t *DBTask) State() string {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	if t.Task != nil {
+		return t.Task.PrivateState
+	}
+	return ""
+}
+
+func (t *DBTask) Persisted() bool {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	return t.Task != nil && t.Task.ID != 0
+}
+
+func (t *DBTask) Executed() time.Duration {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	if t.Task != nil {
+		return t.Task.PublicState.ExecutedDuration
+	}
+	return 0
+}
+
+func (t *DBTask) Retried() int {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	if t.Task != nil {
+		return t.Task.PublicState.RetryCount
+	}
+	return 0
+}
+
+func (t *DBTask) Error() error {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	if t.Task != nil && t.Task.PublicState.Error != "" {
+		return errors.New(t.Task.PublicState.Error)
+	}
+
+	return nil
+}
+
+func (t *DBTask) ErrorHistory() []error {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	if t.Task != nil {
+		return lo.Map(t.Task.PublicState.ErrorHistory, func(err string, index int) error {
+			return errors.New(err)
+		})
+	}
+
+	return nil
+}
+
+func (t *DBTask) Model() *ent.Task {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+	return t.Task
+}
+
+func (t *DBTask) Cleanup(ctx context.Context) error {
+	return nil
+}
+
+func (t *DBTask) CorrelationID() uuid.UUID {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	if t.Task != nil {
+		return t.Task.CorrelationID
+	}
+	return uuid.Nil
+}
+
+func (t *DBTask) ShouldPersist() bool {
+	return true
+}
+
+func (t *DBTask) OnPersisted(task *ent.Task) {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	t.Task = task
+}
+
+func (t *DBTask) OnError(err error, d time.Duration) {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	if t.Task != nil {
+		t.Task.PublicState.Error = err.Error()
+		t.Task.PublicState.ExecutedDuration += d
+	}
+}
+
+func (t *DBTask) OnRetry(err error) {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	if t.Task != nil {
+		if t.Task.PublicState.ErrorHistory == nil {
+			t.Task.PublicState.ErrorHistory = make([]string, 0)
+		}
+
+		t.Task.PublicState.ErrorHistory = append(t.Task.PublicState.ErrorHistory, err.Error())
+		t.Task.PublicState.RetryCount++
+	}
+}
+
+func (t *DBTask) OnIterationComplete(d time.Duration) {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	if t.Task != nil {
+		t.Task.PublicState.ExecutedDuration += d
+	}
+}
+
+func (t *DBTask) ResumeTime() int64 {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	if t.Task != nil {
+		return t.Task.PublicState.ResumeTime
+	}
+	return 0
+}
+
+func (t *DBTask) OnSuspend(time int64) {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	if t.Task != nil {
+		t.Task.PublicState.ResumeTime = time
+	}
+}
+
+func (t *DBTask) Progress(ctx context.Context) Progresses {
+	return nil
+}
+
+func (t *DBTask) OnStatusTransition(newStatus task.Status) {
+	// Nop
+}
+
+func (t *DBTask) Lock() {
+	t.mu.Lock()
+}
+
+func (t *DBTask) Unlock() {
+	t.mu.Unlock()
+}
+
+func (t *DBTask) Summarize(hasher hashid.Encoder) *Summary {
+	return &Summary{}
+}
+
+func (t *DBTask) ResumeAfter(next time.Duration) {
+	t.mu.Lock()
+	defer t.mu.Unlock()
+
+	if t.Task != nil {
+		t.Task.PublicState.ResumeTime = time.Now().Add(next).Unix()
+	}
+}
+
+var stateTransitions map[task.Status]map[task.Status]stateTransition
+
+func init() {
+	stateTransitions = map[task.Status]map[task.Status]stateTransition{
+		"": {
+			task.StatusQueued: persistTask,
+		},
+		task.StatusQueued: {
+			task.StatusProcessing: func(ctx context.Context, task Task, newStatus task.Status, q *queue) error {
+				if err := persistTask(ctx, task, newStatus, q); err != nil {
+					return err
+				}
+				return nil
+			},
+			task.StatusQueued: func(ctx context.Context, task Task, newStatus task.Status, q *queue) error {
+				return nil
+			},
+			task.StatusError: func(ctx context.Context, task Task, newStatus task.Status, q *queue) error {
+				q.metric.IncFailureTask()
+				return persistTask(ctx, task, newStatus, q)
+			},
+		},
+		task.StatusProcessing: {
+			task.StatusQueued: persistTask,
+			task.StatusCompleted: func(ctx context.Context, task Task, newStatus task.Status, q *queue) error {
+				q.logger.Info("Execution completed in %s with %d retries, clean up...", task.Executed(), task.Retried())
+				q.metric.IncSuccessTask()
+
+				if err := task.Cleanup(ctx); err != nil {
+					q.logger.Error("Task cleanup failed: %s", err.Error())
+				}
+
+				if q.registry != nil {
+					q.registry.Delete(task.ID())
+				}
+
+				if err := persistTask(ctx, task, newStatus, q); err != nil {
+					return err
+				}
+				return nil
+			},
+			task.StatusError: func(ctx context.Context, task Task, newStatus task.Status, q *queue) error {
+				q.logger.Error("Execution failed with error in %s with %d retries, clean up...", task.Executed(), task.Retried())
+				q.metric.IncFailureTask()
+
+				if err := task.Cleanup(ctx); err != nil {
+					q.logger.Error("Task cleanup failed: %s", err.Error())
+				}
+
+				if q.registry != nil {
+					q.registry.Delete(task.ID())
+				}
+
+				if err := persistTask(ctx, task, newStatus, q); err != nil {
+					return err
+				}
+
+				return nil
+			},
+			task.StatusCanceled: func(ctx context.Context, task Task, newStatus task.Status, q *queue) error {
+				q.logger.Info("Execution canceled, clean up...", task.Executed(), task.Retried())
+				q.metric.IncFailureTask()
+
+				if err := task.Cleanup(ctx); err != nil {
+					q.logger.Error("Task cleanup failed: %s", err.Error())
+				}
+
+				if q.registry != nil {
+					q.registry.Delete(task.ID())
+				}
+
+				if err := persistTask(ctx, task, newStatus, q); err != nil {
+					return err
+				}
+
+				return nil
+			},
+			task.StatusProcessing: persistTask,
+			task.StatusSuspending: func(ctx context.Context, task Task, newStatus task.Status, q *queue) error {
+				q.metric.IncSuspendingTask()
+				if err := persistTask(ctx, task, newStatus, q); err != nil {
+					return err
+				}
+				q.logger.Info("Task %d suspended, resume time: %d", task.ID(), task.ResumeTime())
+				return q.QueueTask(ctx, task)
+			},
+		},
+		task.StatusSuspending: {
+			task.StatusProcessing: func(ctx context.Context, task Task, newStatus task.Status, q *queue) error {
+				q.metric.DecSuspendingTask()
+				return persistTask(ctx, task, newStatus, q)
+			},
+			task.StatusError: func(ctx context.Context, task Task, newStatus task.Status, q *queue) error {
+				q.metric.IncFailureTask()
+				return persistTask(ctx, task, newStatus, q)
+			},
+		},
+	}
+
+}
+
+func persistTask(ctx context.Context, task Task, newState task.Status, q *queue) error {
+	// Persist Task into inventory
+	if task.ShouldPersist() {
+		if err := saveTaskToInventory(ctx, task, newState, q); err != nil {
+			return err
+		}
+	} else {
+		task.OnStatusTransition(newState)
+	}
+
+	return nil
+}
+
+func saveTaskToInventory(ctx context.Context, task Task, newStatus task.Status, q *queue) error {
+	var (
+		errStr     string
+		errHistory []string
+	)
+	if err := task.Error(); err != nil {
+		errStr = err.Error()
+	}
+
+	errHistory = lo.Map(task.ErrorHistory(), func(err error, index int) string {
+		return err.Error()
+	})
+
+	args := &inventory.TaskArgs{
+		Status: newStatus,
+		Type:   task.Type(),
+		PublicState: &types.TaskPublicState{
+			RetryCount:       task.Retried(),
+			ExecutedDuration: task.Executed(),
+			ErrorHistory:     errHistory,
+			Error:            errStr,
+			ResumeTime:       task.ResumeTime(),
+		},
+		PrivateState:  task.State(),
+		OwnerID:       task.Owner().ID,
+		CorrelationID: logging.CorrelationID(ctx),
+	}
+
+	var (
+		res *ent.Task
+		err error
+	)
+
+	if !task.Persisted() {
+		res, err = q.taskClient.New(ctx, args)
+	} else {
+		res, err = q.taskClient.Update(ctx, task.Model(), args)
+	}
+	if err != nil {
+		return fmt.Errorf("failed to persist Task into DB: %w", err)
+	}
+
+	task.OnPersisted(res)
+	return nil
+}
--- a/pkg/queue/thread.go
+++ b/pkg/queue/thread.go
@@ -0,0 +1,24 @@
+package queue
+
+import "sync"
+
+type routineGroup struct {
+	waitGroup sync.WaitGroup
+}
+
+func newRoutineGroup() *routineGroup {
+	return new(routineGroup)
+}
+
+func (g *routineGroup) Run(fn func()) {
+	g.waitGroup.Add(1)
+
+	go func() {
+		defer g.waitGroup.Done()
+		fn()
+	}()
+}
+
+func (g *routineGroup) Wait() {
+	g.waitGroup.Wait()
+}