# study nuitka module cache 本來在研究看能不能在 nuitka 找尋 module 的時候加一層cache 追到後面發現作者已經有寫了,這邊記錄一下 ```python= def makeOptimizationPass(): """Make a single pass for optimization, indication potential completion.""" finished = True ModuleRegistry.startTraversal() _restartProgress() main_module = None stdlib_phase_done = False while True: current_module = ModuleRegistry.nextModule() if current_module is None: if main_module is not None and pass_count == 1: considerUsedModules(module=main_module, pass_count=-1) stdlib_phase_done = True main_module = None continue break if current_module.isMainModule() and not stdlib_phase_done: main_module = current_module _traceProgressModuleStart(current_module) module_name = current_module.getFullName() with TimerReport( message="Optimizing %s" % module_name, decider=False ) as module_timer: changed, micro_passes = optimizeModule(current_module) ModuleRegistry.addModuleOptimizationTimeInformation( module_name=module_name, pass_number=pass_count, time_used=module_timer.getDelta(), micro_passes=micro_passes, merge_counts=fetchMergeCounts(), ) _traceProgressModuleEnd(current_module) if changed: finished = False # Unregister collection traces from now unused code, dropping the trace # collections of functions no longer used. This must be done after global # optimization due to cross module usages. for current_module in ModuleRegistry.getDoneModules(): if current_module.isCompiledPythonModule(): for unused_function in current_module.getUnusedFunctions(): Variables.updateVariablesFromCollection( old_collection=unused_function.trace_collection, new_collection=None, source_ref=unused_function.getSourceReference(), ) unused_function.trace_collection = None unused_function.finalize() current_module.subnode_functions = tuple( function for function in current_module.subnode_functions if function in current_module.getUsedFunctions() ) _endProgress() return finished def optimizeModules(output_filename): Graphs.startGraph() finished = makeOptimizationPass() # Demote compiled modules to bytecode, now that imports had a chance to be resolved, and # dependencies were handled. for module in ModuleRegistry.getDoneModules(): if ( module.isCompiledPythonModule() and module.getCompilationMode() == "bytecode" ): demoteCompiledModuleToBytecode(module) # Second, "endless" pass. while not finished: finished = makeOptimizationPass() Graphs.endGraph(output_filename) ``` 需要關注的是,這邊是我一開始想優化的地方optimizeModule ```=python with TimerReport( message="Optimizing %s" % module_name, decider=False ) as module_timer: changed, micro_passes = optimizeModule(current_module) ``` 這邊有三種module type ```python= def optimizeModule(module): # The tag set is global, so it can track changes without context. # pylint: disable=global-statement global tag_set tag_set = TagSet() addExtraSysPaths(Plugins.getModuleSysPathAdditions(module.getFullName())) if module.isPythonExtensionModule(): optimizeExtensionModule(module) return False, 0 elif module.isCompiledPythonModule(): return optimizeCompiledPythonModule(module) else: optimizeUncompiledPythonModule(module) return False, 0 ``` 其中attemptRecursion()可以看到裡面其實有時做cache 機制 ```python= module.attemptRecursion() ``` 這邊會遞迴找尋依賴的 module ```python= def attemptRecursion(self): # Make sure the package is recursed to if any package_name = self.module_name.getPackageName() if package_name is None: return () # Return the list of newly added modules. package = getModuleByName(package_name) if package_name is not None and package is None: ( _package_name, package_filename, package_module_kind, finding, ) = locateModule( module_name=package_name, parent_package=None, level=0, ) # If we can't find the package for Python3.3 that is semi-OK, it might be in a # namespace package, these have no init code. if python_version >= 0x300 and not package_filename: return () if package_name == "uniconvertor.app.modules": return () assert package_filename is not None, (package_name, finding) assert _package_name == package_name, ( package_filename, _package_name, package_name, ) decision, _reason = decideRecursion( using_module_name=self.getFullName(), module_filename=package_filename, module_name=package_name, module_kind=package_module_kind, ) if decision is not None: package = recurseTo( module_name=package_name, module_filename=package_filename, module_kind=package_module_kind, source_ref=self.source_ref, reason="parent package", using_module_name=self.module_name, ) if package: from nuitka.ModuleRegistry import addUsedModule addUsedModule( package, using_module=self, usage_tag="package", reason="Containing package of '%s'." % self.getFullName(), source_ref=self.source_ref, ) ``` 這邊要關注recurseTo ```python= def recurseTo( module_name, module_filename, module_kind, source_ref, reason, using_module_name, ): try: module = ImportCache.getImportedModuleByNameAndPath( module_name, module_filename ) except KeyError: module = None if module is None: Plugins.onModuleRecursion( module_filename=module_filename, module_name=module_name, module_kind=module_kind, using_module_name=using_module_name, source_ref=source_ref, reason=reason, ) module = _recurseTo( module_name=module_name, module_filename=module_filename, module_kind=module_kind, reason=reason, ) return module ``` 這邊其實就是做cache 的地方了,假設之前沒遇過的module透過 遞迴找到的module 會存到ImportCache.addImportedModule(module) ```python= def _recurseTo(module_name, module_filename, module_kind, reason): from nuitka.tree import Building module = Building.buildModule( module_name=module_name, module_kind=module_kind, module_filename=module_filename, reason=reason, source_code=None, is_top=False, is_main=False, is_fake=False, hide_syntax_error=True, ) ImportCache.addImportedModule(module) return module ``` 而前一行建立 module 的function 其實可以在裡面看到 load module cache 的地方 ```pyhton= module = Building.buildModule( module_name=module_name, module_kind=module_kind, module_filename=module_filename, reason=reason, source_code=None, is_top=False, is_main=False, is_fake=False, hide_syntax_error=True, ) ``` buildModule =>_createModule => ```pyhton= def buildModule( module_name, module_kind, module_filename, reason, source_code, is_top, is_main, is_fake, hide_syntax_error, ): # Many details to deal with, # pylint: disable=too-many-branches,too-many-locals,too-many-statements ( main_added, is_package, is_namespace, source_ref, source_filename, ) = Importing.decideModuleSourceRef( filename=module_filename, module_name=module_name, is_main=is_main, is_fake=is_fake, logger=general, ) if hasPythonFlagPackageMode(): if is_top and shallMakeModule(): optimization_logger.warning( "Python flag -m (package_mode) has no effect in module mode, it's only for executables." ) elif is_main and not main_added: optimization_logger.warning( "Python flag -m (package_mode) only works on packages with '__main__.py'." ) # Handle bytecode module case immediately. if module_kind == "pyc": return makeUncompiledPythonModule( module_name=module_name, reason=reason, filename=module_filename, bytecode=loadCodeObjectData(module_filename), is_package=is_package, technical=module_name in detectEarlyImports(), ) # Read source code if necessary. Might give a SyntaxError due to not being proper # encoded source. if source_filename is not None and not is_namespace and module_kind == "py": # For fake modules, source is provided directly. original_source_code = None contributing_plugins = () if source_code is None: try: ( source_code, original_source_code, contributing_plugins, ) = readSourceCodeFromFilenameWithInformation( module_name=module_name, source_filename=source_filename ) except SyntaxError as e: # Avoid hiding our own syntax errors. if not hasattr(e, "generated_by_nuitka"): raise # Do not hide SyntaxError in main module. if not hide_syntax_error: raise return _makeModuleBodyFromSyntaxError( exc=e, module_name=module_name, reason=reason, module_filename=module_filename, ) try: with withNoSyntaxWarning(): ast_tree = parseSourceCodeToAst( source_code=source_code, module_name=module_name, filename=source_filename, line_offset=0, ) except (SyntaxError, IndentationError) as e: # Do not hide SyntaxError if asked not to. if not hide_syntax_error: raise if original_source_code is not None: try: parseSourceCodeToAst( source_code=original_source_code, module_name=module_name, filename=source_filename, line_offset=0, ) except (SyntaxError, IndentationError): # Also an exception without the plugins, that is OK pass else: source_diff = getSourceCodeDiff(original_source_code, source_code) for line in source_diff: plugins_logger.warning(line, keep_format=True) if len(contributing_plugins) == 1: next(iter(contributing_plugins)).sysexit( "Making changes to '%s' that cause SyntaxError '%s'" % (module_name, e) ) else: plugins_logger.sysexit( "One of the plugins '%s' is making changes to '%s' that cause SyntaxError '%s'" % (",".join(contributing_plugins), module_name, e) ) return _makeModuleBodyFromSyntaxError( exc=e, module_name=module_name, reason=reason, module_filename=module_filename, ) except CodeTooComplexCode: # Do not hide CodeTooComplexCode in main module. if is_main: raise return _makeModuleBodyTooComplex( module_name=module_name, reason=reason, module_filename=module_filename, source_code=source_code, is_package=is_package, ) else: ast_tree = None source_code = None module = _createModule( module_name=module_name, module_filename=None if is_fake else module_filename, module_kind=module_kind, reason=reason, source_code=source_code, source_ref=source_ref, is_top=is_top, is_main=is_main, is_namespace=is_namespace, is_package=is_package, main_added=main_added, ) if is_top: ModuleRegistry.addRootModule(module) OutputDirectories.setMainModule(module) if module.isCompiledPythonModule() and source_code is not None: try: createModuleTree( module=module, source_ref=source_ref, ast_tree=ast_tree, is_main=is_main, ) except CodeTooComplexCode: # Do not hide CodeTooComplexCode in main module. if is_main or is_top: raise return _makeModuleBodyTooComplex( module_name=module_name, reason=reason, module_filename=module_filename, source_code=source_code, is_package=is_package, ) return module ``` 其中可以看到 _loadUncompiledModuleFromCache這邊就是load cache 的核心 ```python= def _createModule( module_name, module_filename, module_kind, reason, source_code, source_ref, is_namespace, is_package, is_top, is_main, main_added, ): is_stdlib = module_filename is not None and isStandardLibraryPath(module_filename) if module_kind == "extension": result = PythonExtensionModule( module_name=module_name, module_filename=module_filename, reason=reason, technical=is_stdlib and module_name in detectEarlyImports(), source_ref=source_ref, ) elif is_main: assert reason == "main", reason result = PythonMainModule( main_added=main_added, module_name=module_name, mode=decideCompilationMode( is_top=is_top, module_name=module_name, module_filename=module_filename, for_pgo=False, ), future_spec=None, source_ref=source_ref, ) checkPythonVersionFromCode(source_code) elif is_namespace: result = createNamespacePackage( module_name=module_name, reason=reason, is_top=is_top, source_ref=source_ref, ) else: mode = decideCompilationMode( is_top=is_top, module_name=module_name, module_filename=module_filename, for_pgo=False, ) if ( mode == "bytecode" and not is_top and not shallDisableBytecodeCacheUsage() and hasCachedImportedModuleUsageAttempts( module_name=module_name, source_code=source_code, source_ref=source_ref ) ): result = _loadUncompiledModuleFromCache( module_name=module_name, reason=reason, is_package=is_package, source_code=source_code, source_ref=source_ref, ) # Not used anymore source_code = None else: if is_package: result = CompiledPythonPackage( module_name=module_name, reason=reason, is_top=is_top, mode=mode, future_spec=None, source_ref=source_ref, ) else: result = CompiledPythonModule( module_name=module_name, reason=reason, is_top=is_top, mode=mode, future_spec=None, source_ref=source_ref, ) return result ``` 這邊的 getCachedImportedModuleUsageAttempts 就是去撈存在json 的 module 的各個路徑 ```python= def _loadUncompiledModuleFromCache( module_name, reason, is_package, source_code, source_ref ): result = makeUncompiledPythonModule( module_name=module_name, reason=reason, filename=source_ref.getFilename(), bytecode=demoteSourceCodeToBytecode( module_name=module_name, source_code=source_code, filename=source_ref.getFilename(), ), technical=module_name in detectEarlyImports(), is_package=is_package, ) used_modules = OrderedSet() used_modules = getCachedImportedModuleUsageAttempts( module_name=module_name, source_code=source_code, source_ref=source_ref ) # assert not is_package, (module_name, used_modules, result, result.getCompileTimeFilename()) result.setUsedModules(used_modules) return result ``` ```python= def getCachedImportedModuleUsageAttempts(module_name, source_code, source_ref): cache_name = makeCacheName(module_name, source_code) cache_filename = _getCacheFilename(cache_name, "json") if not os.path.exists(cache_filename): return None data = loadJsonFromFilename(cache_filename) print("add test",cache_filename) if data is None: return None if data.get("file_format_version") != _cache_format_version: return None if data["module_name"] != module_name: return None result = OrderedSet() for module_used in data["modules_used"]: used_module_name = ModuleName(module_used["module_name"]) # Retry the module scan to see if it still gives same result if module_used["finding"] == "relative": _used_module_name, filename, module_kind, finding = locateModule( module_name=used_module_name.getBasename(), parent_package=used_module_name.getPackageName(), level=1, ) else: _used_module_name, filename, module_kind, finding = locateModule( module_name=used_module_name, parent_package=None, level=0 ) if ( finding != module_used["finding"] or module_kind != module_used["module_kind"] ): assert module_name != "email._header_value_parser", ( finding, module_used["finding"], ) return None result.add( makeModuleUsageAttempt( module_name=used_module_name, filename=filename, finding=module_used["finding"], module_kind=module_used["module_kind"], # TODO: Level might have to be dropped. level=0, # We store only the line number, so this cheats it to at full one. source_ref=source_ref.atLineNumber(module_used["source_ref_line"]), reason=module_used["reason"], ) ) for module_used in data["distribution_names"]: # TODO: Consider distributions found and not found and return None if # something changed there. pass return result ``` ``` add test /root/.cache/Nuitka/module-cache/functools@6f55d84b07fe21822d6580fa0cc48ba6@13d363d7c34fe331426e37bf7696c311.json add test /root/.cache/Nuitka/module-cache/importlib@6f55d84b07fe21822d6580fa0cc48ba6@9ba1a7efe39543a96efe13a91b57b8d1.json add test /root/.cache/Nuitka/module-cache/importlib@6f55d84b07fe21822d6580fa0cc48ba6@9ba1a7efe39543a96efe13a91b57b8d1.json add test /root/.cache/Nuitka/module-cache/inspect@99d5702d32117a583f47ffcdeef829eb@871d90d3ff28338d5df2c4752edc5013.json add test /root/.cache/Nuitka/module-cache/inspect@99d5702d32117a583f47ffcdeef829eb@871d90d3ff28338d5df2c4752edc5013.json add test /root/.cache/Nuitka/module-cache/logging@6f55d84b07fe21822d6580fa0cc48ba6@7426f300ca25f73ae2d58a8d12f79e98.json add test /root/.cache/Nuitka/module-cache/logging@6f55d84b07fe21822d6580fa0cc48ba6@7426f300ca25f73ae2d58a8d12f79e98.json add test /root/.cache/Nuitka/module-cache/numbers@6f55d84b07fe21822d6580fa0cc48ba6@05fb1bc183478279024344474aceb99d.json add test /root/.cache/Nuitka/module-cache/numbers@6f55d84b07fe21822d6580fa0cc48ba6@05fb1bc183478279024344474aceb99d.json add test /root/.cache/Nuitka/module-cache/re@6f55d84b07fe21822d6580fa0cc48ba6@1b88c6fb134b9d2d3415304d358eb9a3.json ``` 到這邊就大概知道實作cache這邊到底是怎麼完成的了 ![image](https://hackmd.io/_uploads/BybwcUbDkg.png)