此文章收集平时工作中一些Pythonic code,以供后面参考。
def get_file_content(fpath): """Get file content by the right encoding.""" G_ENCODING_LIST = ["utf-8", "gbk", "latin1"] for encode in G_ENCODING_LIST: try: content = open(fpath, encoding=encode).read() return content except UnicodeDecodeError: if encode == G_ENCODING_LIST[-1]: raise except FileNotFoundError: raise
def get_file_content(fpath): """Get file content by the right encoding.""" G_ENCODING_LIST = ["utf-8", "gbk", "latin1"] for encode in G_ENCODING_LIST: try: content = open(fpath, encoding=encode).read() return content except UnicodeDecodeError: pass except FileNotFoundError: raise else: raise UnicodeDecodeError
为了避免过深的try except嵌套,这里使用了for循环使代码块更加扁平
def add_patterns(self, ptn_docs): """Add pattern set info.""" ltypes = [] for ltype, doc in ptn_docs: ltypes.append(ltype) # 不使用列表表达式创建列表 doc_list = [] for word in jieba.cut(doc): doc_list.append(word) doc_list = [word for word in jieba.cut(doc)] doc_list = list(set(doc_list) - set(G_STOP_WORDS)) self._ptn_simtest_dbs[ltype]["all_doc_list"].append(doc_list) self._ptn_simtest_dbs[ltype]["dict"].add_documents([doc_list]) ......
def add_patterns(self, ptn_docs): """Add pattern set info.""" ltypes = [] for ltype, doc in ptn_docs: ltypes.append(ltype) # 使用列表表达式创建列表 doc_list = [word for word in jieba.cut(doc)] doc_list = list(set(doc_list) - set(G_STOP_WORDS)) self._ptn_simtest_dbs[ltype]["all_doc_list"].append(doc_list) self._ptn_simtest_dbs[ltype]["dict"].add_documents([doc_list]) ......
doc_list = list(set(doc_list) - set(G_STOP_WORDS))布尔值判断
根据判断对象结果返回True or False,可以通过以下方法简写:
def _check_fingerprint(self, suspect): """Check whether fingerprint exist.""" content = open(suspect, "rb").read() md5sum = hashlib.md5(content).hexdigest() wsp = self.ws_data.filter(fingerprint=md5sum) return True is wsp else False
def _check_fingerprint(self, suspect): """Check whether fingerprint exist.""" content = open(suspect, "rb").read() md5sum = hashlib.md5(content).hexdigest() wsp = self.ws_data.filter(fingerprint=md5sum) return bool(wsp)goto in Python
def check_pattern_package(fpath): """Check pattern package correctness.""" base_dir = os.path.dirname(fpath) ret, reason, extract_dir = True, None, None with zipfile.ZipFile(fpath) as zf: infolist = zf.infolist() if not infolist[0].is_dir(): return False, REST_ERR_400_ZIP_BADFILE zf_base_dir = infolist[0].filename md5sum_file = os.path.join(zf_base_dir, "md5sum.txt") if md5sum_file not in zf.namelist(): return False, REST_ERR_400_ZIP_BADFORMAT zf.extractall(base_dir) extract_dir = os.path.join(base_dir, zf_base_dir) try: with open(os.path.jion(base_dir, md5sum_file)) as md5_fp: reader = csv.reader(md5_fp, delimiter=" ") except FileNotFoundError: return False, REST_ERR_400_ZIP_BADFORMAT else: for row in reader: if len(row) < 2: raise PtnPackageParseError(REST_ERR_400_ZIP_BADFORMAT) pzf = os.path.join(extract_dir, row[1]) with open(pzf, "rb") as fpzf: fdata = fpzf.read() md5sum = hashlib.md5(fdata).hexdigest() if md5sum != row[0]: return False, REST_ERR_400_ZIP_BADFILE return True, _
使用“try exception”控制代码执行路径模拟“goto”:
class PtnPackageParseError(Exception): """Exception for pattern package parse.""" def __init__(self, reason, message=""): self.reason = reason self.message = message super().__init__() def check_pattern_package(fpath, cleanup=False): """Check pattern package correctness.""" base_dir = os.path.dirname(fpath) ret, reason, extract_dir = True, None, None try: with zipfile.ZipFile(fpath) as zf: infolist = zf.infolist() if not infolist[0].is_dir(): raise PtnPackageParseError(REST_ERR_400_ZIP_BADFILE) zf_base_dir = infolist[0].filename md5sum_file = os.path.join(zf_base_dir, "md5sum.txt") if md5sum_file not in zf.namelist(): raise PtnPackageParseError(REST_ERR_400_ZIP_BADFORMAT) zf.extractall(base_dir) extract_dir = os.path.join(base_dir, zf_base_dir) try: with open(os.path.jion(base_dir, md5sum_file)) as md5_fp: reader = csv.reader(md5_fp, delimiter=" ") except FileNotFoundError: raise PtnPackageParseError(REST_ERR_400_ZIP_BADFORMAT) else: for row in reader: if len(row) < 2: raise PtnPackageParseError(REST_ERR_400_ZIP_BADFORMAT) pzf = os.path.join(extract_dir, row[1]) with open(pzf, "rb") as fpzf: fdata = fpzf.read() md5sum = hashlib.md5(fdata).hexdigest() if md5sum != row[0]: raise PtnPackageParseError(REST_ERR_400_ZIP_BADFILE) except PtnPackageParseError as e: ret, reason = False, e.reason finally: if cleanup: os.unlink(fpath) if os.path.exists(extract_dir): os.removedirs(extract_dir) return ret, reason, extract_dir
原文出处:github: jasonTu/python-material-collection
摘要:温习统计学的知识为更深层次的学习做准备在的演讲中说就是我们理解但不知道另外的是如何的我在台下想对于那可以理解的我好像都只懂了参考标准高效的流程课程用的是我不想再学一门类似的语言了我会找出相对应的和的来源流程什么是干净的一个变 Why The Data Science Specialization 温习统计学的知识, 为更深层次的学习做准备 Andrew Ng 在 2015 GTC ...
摘要:整理可爱的简化元编程的是什么鬼,多线程性能究竟如何浅谈的语句基础深入理解中的赋值引用拷贝作用域命名空间和作用域窥探引用计数编写漂亮的,可读的代码的最佳时间语言中计数方法的演变描述符解密的内存管理机制深入的内存管理源码剖析 Python 整理 Python3 Official Documentation Python3 Document Coding Style PEP 8 Encodi...
摘要: Caching Libraries for caching data. Beaker - A library for caching and sessions for use with web applications and stand-alone Python scripts and applications. dogpile.cache - dogpile.cache...
摘要:我希望的是类似中文字符这样的使用体验。中文中文这里的遍历就类似中文中文我们这里干的事情类似于里两种类型的区分。 致力于在现代C++中提供Python的编程体验。这个建立在两个关键的基础上 c++ 11/14/17 提供了从 auto 到 structure binding 的语法便利 https://ericniebler.github.io... 提供的 range 抽象 但是 ...
摘要:最终还是要写上足够量的代码,才会有悟道的那一刻。另外,对于代码本身,有一套书写规范,叫做。 Python 里有个小彩蛋: 在 Python Shell 里输入 import this showImg(https://segmentfault.com/img/remote/1460000018394156?w=600&h=463); 这段话被称作 Python 之禅 ( The Zen ...
阅读 1837·2023-04-26 01:44
阅读 1239·2021-11-12 10:34
阅读 1617·2021-09-09 09:33
阅读 1746·2019-08-30 15:44
阅读 2908·2019-08-30 13:49
阅读 2203·2019-08-29 15:26
阅读 955·2019-08-26 13:30
阅读 1428·2019-08-23 18:15