call_queue.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. from functools import wraps
  2. import html
  3. import threading
  4. import time
  5. from modules import shared, progress, errors, devices
  6. queue_lock = threading.Lock()
  7. def wrap_queued_call(func):
  8. def f(*args, **kwargs):
  9. with queue_lock:
  10. res = func(*args, **kwargs)
  11. return res
  12. return f
  13. def wrap_gradio_gpu_call(func, extra_outputs=None):
  14. @wraps(func)
  15. def f(*args, **kwargs):
  16. # if the first argument is a string that says "task(...)", it is treated as a job id
  17. if args and type(args[0]) == str and args[0].startswith("task(") and args[0].endswith(")"):
  18. id_task = args[0]
  19. progress.add_task_to_queue(id_task)
  20. else:
  21. id_task = None
  22. with queue_lock:
  23. shared.state.begin(job=id_task)
  24. progress.start_task(id_task)
  25. try:
  26. res = func(*args, **kwargs)
  27. progress.record_results(id_task, res)
  28. finally:
  29. progress.finish_task(id_task)
  30. shared.state.end()
  31. return res
  32. return wrap_gradio_call(f, extra_outputs=extra_outputs, add_stats=True)
  33. def wrap_gradio_call(func, extra_outputs=None, add_stats=False):
  34. @wraps(func)
  35. def f(*args, extra_outputs_array=extra_outputs, **kwargs):
  36. run_memmon = shared.opts.memmon_poll_rate > 0 and not shared.mem_mon.disabled and add_stats
  37. if run_memmon:
  38. shared.mem_mon.monitor()
  39. t = time.perf_counter()
  40. try:
  41. res = list(func(*args, **kwargs))
  42. except Exception as e:
  43. # When printing out our debug argument list,
  44. # do not print out more than a 100 KB of text
  45. max_debug_str_len = 131072
  46. message = "Error completing request"
  47. arg_str = f"Arguments: {args} {kwargs}"[:max_debug_str_len]
  48. if len(arg_str) > max_debug_str_len:
  49. arg_str += f" (Argument list truncated at {max_debug_str_len}/{len(arg_str)} characters)"
  50. errors.report(f"{message}\n{arg_str}", exc_info=True)
  51. shared.state.job = ""
  52. shared.state.job_count = 0
  53. if extra_outputs_array is None:
  54. extra_outputs_array = [None, '']
  55. error_message = f'{type(e).__name__}: {e}'
  56. res = extra_outputs_array + [f"<div class='error'>{html.escape(error_message)}</div>"]
  57. devices.torch_gc()
  58. shared.state.skipped = False
  59. shared.state.interrupted = False
  60. shared.state.job_count = 0
  61. if not add_stats:
  62. return tuple(res)
  63. elapsed = time.perf_counter() - t
  64. elapsed_m = int(elapsed // 60)
  65. elapsed_s = elapsed % 60
  66. elapsed_text = f"{elapsed_s:.1f} sec."
  67. if elapsed_m > 0:
  68. elapsed_text = f"{elapsed_m} min. "+elapsed_text
  69. if run_memmon:
  70. mem_stats = {k: -(v//-(1024*1024)) for k, v in shared.mem_mon.stop().items()}
  71. active_peak = mem_stats['active_peak']
  72. reserved_peak = mem_stats['reserved_peak']
  73. sys_peak = mem_stats['system_peak']
  74. sys_total = mem_stats['total']
  75. sys_pct = sys_peak/max(sys_total, 1) * 100
  76. toltip_a = "Active: peak amount of video memory used during generation (excluding cached data)"
  77. toltip_r = "Reserved: total amout of video memory allocated by the Torch library "
  78. toltip_sys = "System: peak amout of video memory allocated by all running programs, out of total capacity"
  79. text_a = f"<abbr title='{toltip_a}'>A</abbr>: <span class='measurement'>{active_peak/1024:.2f} GB</span>"
  80. text_r = f"<abbr title='{toltip_r}'>R</abbr>: <span class='measurement'>{reserved_peak/1024:.2f} GB</span>"
  81. text_sys = f"<abbr title='{toltip_sys}'>Sys</abbr>: <span class='measurement'>{sys_peak/1024:.1f}/{sys_total/1024:g} GB</span> ({sys_pct:.1f}%)"
  82. vram_html = f"<p class='vram'>{text_a}, <wbr>{text_r}, <wbr>{text_sys}</p>"
  83. else:
  84. vram_html = ''
  85. # last item is always HTML
  86. res[-1] += f"<div class='performance'><p class='time'>Time taken: <wbr><span class='measurement'>{elapsed_text}</span></p>{vram_html}</div>"
  87. return tuple(res)
  88. return f