logger.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
  2. # pyre-unsafe
  3. import atexit
  4. import functools
  5. import logging
  6. import sys
  7. import uuid
  8. from typing import Any, Dict, Optional, Union
  9. from hydra.utils import instantiate
  10. from iopath.common.file_io import g_pathmgr
  11. from numpy import ndarray
  12. from sam3.train.utils.train_utils import get_machine_local_and_dist_rank, makedir
  13. from torch import Tensor
  14. from torch.utils.tensorboard import SummaryWriter
  15. Scalar = Union[Tensor, ndarray, int, float]
  16. def make_tensorboard_logger(log_dir: str, **writer_kwargs: Any):
  17. makedir(log_dir)
  18. summary_writer_method = SummaryWriter
  19. return TensorBoardLogger(
  20. path=log_dir, summary_writer_method=summary_writer_method, **writer_kwargs
  21. )
  22. class TensorBoardWriterWrapper:
  23. """
  24. A wrapper around a SummaryWriter object.
  25. """
  26. def __init__(
  27. self,
  28. path: str,
  29. *args: Any,
  30. filename_suffix: str = None,
  31. summary_writer_method: Any = SummaryWriter,
  32. **kwargs: Any,
  33. ) -> None:
  34. """Create a new TensorBoard logger.
  35. On construction, the logger creates a new events file that logs
  36. will be written to. If the environment variable `RANK` is defined,
  37. logger will only log if RANK = 0.
  38. NOTE: If using the logger with distributed training:
  39. - This logger can call collective operations
  40. - Logs will be written on rank 0 only
  41. - Logger must be constructed synchronously *after* initializing distributed process group.
  42. Args:
  43. path (str): path to write logs to
  44. *args, **kwargs: Extra arguments to pass to SummaryWriter
  45. """
  46. self._writer: Optional[SummaryWriter] = None
  47. _, self._rank = get_machine_local_and_dist_rank()
  48. self._path: str = path
  49. if self._rank == 0:
  50. logging.info(
  51. f"TensorBoard SummaryWriter instantiated. Files will be stored in: {path}"
  52. )
  53. self._writer = summary_writer_method(
  54. log_dir=path,
  55. *args,
  56. filename_suffix=filename_suffix or str(uuid.uuid4()),
  57. **kwargs,
  58. )
  59. else:
  60. logging.debug(
  61. f"Not logging meters on this host because env RANK: {self._rank} != 0"
  62. )
  63. atexit.register(self.close)
  64. @property
  65. def writer(self) -> Optional[SummaryWriter]:
  66. return self._writer
  67. @property
  68. def path(self) -> str:
  69. return self._path
  70. def flush(self) -> None:
  71. """Writes pending logs to disk."""
  72. if not self._writer:
  73. return
  74. self._writer.flush()
  75. def close(self) -> None:
  76. """Close writer, flushing pending logs to disk.
  77. Logs cannot be written after `close` is called.
  78. """
  79. if not self._writer:
  80. return
  81. self._writer.close()
  82. self._writer = None
  83. class TensorBoardLogger(TensorBoardWriterWrapper):
  84. """
  85. A simple logger for TensorBoard.
  86. """
  87. def log_dict(self, payload: Dict[str, Scalar], step: int) -> None:
  88. """Add multiple scalar values to TensorBoard.
  89. Args:
  90. payload (dict): dictionary of tag name and scalar value
  91. step (int, Optional): step value to record
  92. """
  93. if not self._writer:
  94. return
  95. for k, v in payload.items():
  96. self.log(k, v, step)
  97. def log(self, name: str, data: Scalar, step: int) -> None:
  98. """Add scalar data to TensorBoard.
  99. Args:
  100. name (string): tag name used to group scalars
  101. data (float/int/Tensor): scalar data to log
  102. step (int, optional): step value to record
  103. """
  104. if not self._writer:
  105. return
  106. self._writer.add_scalar(name, data, global_step=step, new_style=True)
  107. def log_hparams(
  108. self, hparams: Dict[str, Scalar], meters: Dict[str, Scalar]
  109. ) -> None:
  110. """Add hyperparameter data to TensorBoard.
  111. Args:
  112. hparams (dict): dictionary of hyperparameter names and corresponding values
  113. meters (dict): dictionary of name of meter and corersponding values
  114. """
  115. if not self._writer:
  116. return
  117. self._writer.add_hparams(hparams, meters)
  118. class Logger:
  119. """
  120. A logger class that can interface with multiple loggers. It now supports tensorboard only for simplicity, but you can extend it with your own logger.
  121. """
  122. def __init__(self, logging_conf):
  123. # allow turning off TensorBoard with "should_log: false" in config
  124. tb_config = logging_conf.tensorboard_writer
  125. tb_should_log = tb_config and tb_config.pop("should_log", True)
  126. self.tb_logger = instantiate(tb_config) if tb_should_log else None
  127. def log_dict(self, payload: Dict[str, Scalar], step: int) -> None:
  128. if self.tb_logger:
  129. self.tb_logger.log_dict(payload, step)
  130. def log(self, name: str, data: Scalar, step: int) -> None:
  131. if self.tb_logger:
  132. self.tb_logger.log(name, data, step)
  133. def log_hparams(
  134. self, hparams: Dict[str, Scalar], meters: Dict[str, Scalar]
  135. ) -> None:
  136. if self.tb_logger:
  137. self.tb_logger.log_hparams(hparams, meters)
  138. # cache the opened file object, so that different calls to `setup_logger`
  139. # with the same file name can safely write to the same file.
  140. @functools.lru_cache(maxsize=None)
  141. def _cached_log_stream(filename):
  142. # we tune the buffering value so that the logs are updated
  143. # frequently.
  144. log_buffer_kb = 10 * 1024 # 10KB
  145. io = g_pathmgr.open(filename, mode="a", buffering=log_buffer_kb)
  146. atexit.register(io.close)
  147. return io
  148. def setup_logging(
  149. name,
  150. output_dir=None,
  151. rank=0,
  152. log_level_primary="INFO",
  153. log_level_secondary="ERROR",
  154. ):
  155. """
  156. Setup various logging streams: stdout and file handlers.
  157. For file handlers, we only setup for the master gpu.
  158. """
  159. # get the filename if we want to log to the file as well
  160. log_filename = None
  161. if output_dir:
  162. makedir(output_dir)
  163. if rank == 0:
  164. log_filename = f"{output_dir}/log.txt"
  165. logger = logging.getLogger(name)
  166. logger.setLevel(log_level_primary)
  167. # create formatter
  168. FORMAT = "%(levelname)s %(asctime)s %(filename)s:%(lineno)4d: %(message)s"
  169. formatter = logging.Formatter(FORMAT)
  170. # Cleanup any existing handlers
  171. for h in logger.handlers:
  172. logger.removeHandler(h)
  173. logger.root.handlers = []
  174. # setup the console handler
  175. console_handler = logging.StreamHandler(sys.stdout)
  176. console_handler.setFormatter(formatter)
  177. logger.addHandler(console_handler)
  178. if rank == 0:
  179. console_handler.setLevel(log_level_primary)
  180. else:
  181. console_handler.setLevel(log_level_secondary)
  182. # we log to file as well if user wants
  183. if log_filename and rank == 0:
  184. file_handler = logging.StreamHandler(_cached_log_stream(log_filename))
  185. file_handler.setLevel(log_level_primary)
  186. file_handler.setFormatter(formatter)
  187. logger.addHandler(file_handler)
  188. logging.root = logger
  189. def shutdown_logging():
  190. """
  191. After training is done, we ensure to shut down all the logger streams.
  192. """
  193. logging.info("Shutting down loggers...")
  194. handlers = logging.root.handlers
  195. for handler in handlers:
  196. handler.close()