功能基础篇7——Python基础数据结构与算法标准库

编程入门行业动态更新时间:2024-10-11 07:32:18

功能<a href=https://www.elefans.com/category/jswz/34/1770030.html style= 基础篇7——Python基础数据结构与算法标准库"/>

功能基础篇7——Python基础数据结构与算法标准库

基础数据结构与算法

collections.abc

容器数据类型抽象基类，collections.abc为Python标准库

collections

容器数据类型，collections为Python标准库

命名元组

from collections import namedtupleCard = namedtuple('card', ['suits', 'number'])
c1 = Card('红桃', 2)
print(c1, c1.number, c1.suits)

from random import choice, shuffle
from collections import namedtuple_Card = namedtuple('Card', ['rank', 'suit'])class Poker:ranks = [str(n) for n in range(2, 11)] + list('JQKA')suits = ['红心', '方片', '梅花', '黑桃']def __init__(self):self._cards = [_Card(rank, suit) for rank in Poker.ranks for suit in Poker.suits]def __len__(self):return self._cards.__len__()def __getitem__(self, item):return self._cards[item]def __setitem__(self, key, value):self._cards[key] = valuecards = Poker()
print(cards[3])
print(len(cards))
shuffle(cards)  # 会改变每个索引对应的值，需要__setitem__
print(cards[3])
print(cards[:3])
print(choice(cards))
print(choice(cards))

双端队列

from collections import dequedq = deque()
dq.append(1)
dq.append(2)
print(dq.popleft())  # 1
print(dq.pop())  # 2

计数器

可哈希

from collections import Counteritems = ['red', 'blue', 'red', 'green', 'blue', 'blue']
print(Counter(items))  # Counter({'blue': 3, 'red': 2, 'green': 1})

带默认工厂字典

from collections import defaultdicts = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]
d = defaultdict(list)
for k, v in s:d[k].append(v)  # 会有一个None到[]的自动变换，否则会报错，处理空值
# default_factory为可调用对象，用于产生一个默认值
print(sorted(d.items()))  # [('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]dd = defaultdict(lambda: 0)
print(dd['key'])  # 0

有序字典

有序字典中元素排列顺序与插入顺序保持一致，Python3.6及以前dict无序，Python3.7及以后有序

from collections import OrderedDictd = dict([('a', 1), ('b', 2), ('c', 3)])
print(d)od = OrderedDict([('a', 1), ('b', 2), ('c', 3)])
print(od)

ChainMap

倒序迭代

from collections import ChainMapbaseline = {'music': 'bach', 'art': 'rembrandt'}
adjustments = {'art': 'van gogh', 'opera': 'carmen'}
cm = ChainMap(adjustments, baseline)
for i in cm:print(i, end=" ")  # music art opera 
print(list(cm))  # ['music', 'art', 'opera']
print(cm.get('art'))  # van gogh

UserString、UserList、UserDict

当需要对内置数据类型str、list、dict进行拓展，并且想要覆写其某些标准功能时，应该使用UserDict, UserList, 和UserString,而不是直接继承str、list、dict

from collections import UserString, UserList, UserDictus = UserString("abc")
print(us)
print(us.data)ul = UserList([1, 2, 3])
print(ul)
print(ul.data)ud = UserDict([(1, 'a'), (2, 'b'), (3, 'c')])
print(ud)
print(ud.data)

实现一个自动转大写key的字段，初始化、[]设置属性__setitem__、update分别需要重写，若使用UserDict只需重写__setitem__

class UpperDict(dict):def __init__(self, data):data = {k.upper(): v for k, v in data.items()}super().__init__(data)def __setitem__(self, key, value):key = key.upper()super().__setitem__(key, value)def update(self, data):  # 方法 'UpperDict.update()' 的签名与类 'MutableMapping' 中基方法的签名不匹配data = {k.upper(): v for k, v in data.items()}super().update(data)d = UpperDict({'a': 1})
d['b'] = 1
d.update({'c': 1})
print(d)

class UpperDict(UserDict):def __setitem__(self, key, value):key = key.upper()super().__setitem__(key, value)d = UpperDict({'a': 1})
d['b'] = 1
d.update({'c': 1})
print(d)

queue

queue为Python标准库，同步队列，线程安全

import queueq = queue.Queue()
print(q.put("1"))  # None
print(q.qsize())  # 1
print(q.get())  # 1
print(q.put_nowait("2"))  # None
print(q.get_nowait())  # 2# 后进先出队列
lifo_queue = queue.LifoQueue()
print(lifo_queue.put(1))  # None
print(lifo_queue.put(2))  # None
print(lifo_queue.get())  # 2# 优先队列，最小的先出
pq = queue.PriorityQueue()
pq.put(3)
pq.put(1)
print(pq.get())  # 1
pq.put(4)
pq.put(0)
print(pq.get())  # 0

enum

enum为Python标准库，枚举

from enum import Enumclass Color(Enum):RED = 1GREEN = 2BLUE = 3print(Color.RED.name, Color.RED.value)  # RED 1

array

array为Python标准库，数字数组，纯数字操作比list效率高

from array import arraya = array('l', [1, 2, 3, 4, 5])
print(a, type(a))

heapq

heapq为Python标准库，堆队列算法（优先队列算法），堆使用小根堆，基于数组实现

import heapq
import operatorl = [5, 3, 4, 2, 1]# 堆化
heapq.heapify(l)
print(l)  # [1, 2, 4, 5, 3]
print(l[0])  # 1 第一个元素为小根堆最小值# 入堆
heapq.heappush(l, 6)
print(l)  # [1, 2, 4, 5, 3, 6]# 出堆
print(heapq.heappop(l))  # 1# 先入后出，比heappush+heappop高效
print(heapq.heappushpop(l, 0))  # 0
print(l)  # [2, 3, 4, 5, 6]# 先出后入，比heappop+heappush高效
print(heapq.heapreplace(l, 0))  # 2
print(l)  # [0, 3, 4, 5, 6]# 获取迭代器最大的n个元素
print(heapq.nlargest(10, range(100, 200, 5)))  # [195, 190, 185, 180, 175, 170, 165, 160, 155, 150]# 获取迭代器最小的n个元素
print(heapq.nsmallest(10, range(100, 200, 5)))  # [100, 105, 110, 115, 120, 125, 130, 135, 140, 145]# 若干有序迭代器元素合并，合并结果为一个生成器，日志合并示例
log_file1 = ["2023-10-23 10:12:13 [INFO] ...", "2023-10-23 10:12:14 [INFO] ...", "2023-10-23 10:12:15 [INFO] ..."]
log_file2 = ["2023-10-23 10:12:12 [INFO] ...", "2023-10-23 10:12:14 [DEBUG] ...", "2023-10-23 10:12:14 [INFO] ..."]
log_file3 = ["2023-10-23 10:12:11 [WARN] ...", "2023-10-23 10:12:18 [INFO] ...", "2023-10-23 10:12:20 [ERROR] ..."]for line in heapq.merge(log_file1, log_file2, log_file3, key=operator.itemgetter(slice(0, 20))):print(line)
# 2023-10-23 10:12:11 [WARN] ...
# 2023-10-23 10:12:12 [INFO] ...
# 2023-10-23 10:12:13 [INFO] ...
# 2023-10-23 10:12:14 [INFO] ...
# 2023-10-23 10:12:14 [DEBUG] ...
# 2023-10-23 10:12:14 [INFO] ...
# 2023-10-23 10:12:15 [INFO] ...
# 2023-10-23 10:12:18 [INFO] ...
# 2023-10-23 10:12:20 [ERROR] ...

bisect

bisect为Python标准库，基于二分查找算法定位插入点，模块中函数基于插入点设计，并非使用__eq__查找特定值，而是使用__lt__查找插入点。

公共参数说明

a，列表
x，列表元素
lo，低位下标
hi，高位下标
key，比较键

函数说明

bisect.bisect_left(a, x, lo=0, hi=len(a), *, key=None)，返回的插入点ip将数组a分为两个切片使得对于左侧切片 all(elem < x for elem in a[lo : ip]) 为真值而对于右侧切片 all(elem >= x for elem in a[ip : hi]) 为真值
bisect.bisect_right(a, x, lo=0, hi=len(a), *, key=None)，返回的插入点ip将数组a分为两个切片使得对于左侧切片 all(elem <= x for elem in a[lo : ip]) 为真值而对于右则切片 all(elem > x for elem in a[ip : hi]) 为真值
bisect.insort_right(a, x, lo=0, hi=len(a), *, key=None)，基于bisect_right函数定位插入点然后插入元素，插入算法时间复杂度为O(n)
bisect.insort_left(a, x, lo=0, hi=len(a), *, key=None)，基于bisect_left函数定位插入点然后插入元素，插入算法时间复杂度为O(n)
bisect.bisect与bisect.bisect_right等价
bisect.insort与bisect.insort_right等价

import bisect
import operator
from collections import namedtuplel = [1, 3, 5, 7, 9]print(bisect.bisect_right(l, 5))  # 3
print(bisect.bisect_left(l, 5))  # 2
print(bisect.bisect_right(l, 4))  # 2
print(bisect.bisect_left(l, 4))  # 2
print(bisect.bisect_right(l, 9))  # 5
print(bisect.bisect_left(l, 9))  # 4
print(bisect.bisect_right(l, 1))  # 1
print(bisect.bisect_left(l, 1))  # 0
print(bisect.bisect_right(l, 0))  # 0
print(bisect.bisect_left(l, 0))  # 0
print(bisect.bisect_right(l, 10))  # 5
print(bisect.bisect_left(l, 10))  # 5print(bisect.bisect_left(l, 5))  # 2
bisect.insort_left(l, 5)
print(l)  # [1, 3, 5, 5, 7, 9] 插入的是第一个5 left会定位所有 满足小于5 的元素的下一个位置  print(bisect.bisect_right(l, 5))  # 4
bisect.insort_right(l, 5)
print(l)  # [1, 3, 5, 5, 5, 7, 9] 插入的是第三个5 right会定位所有 满足大于5 的元素的上一个位置# 若数组无序则不会正常工作
a = [1, 5, 3, 6]
print(bisect.bisect_left(a, 5))  # 3
bisect.bisect_left(a, 5)  # [1, 5, 3, 6]
print(a)

基于定位插入点特性便于分段

import bisect
import operator
from collections import namedtuplefor score in [33, 99, 77, 70, 89, 90, 100]:print('FDCBA'[bisect.bisect([60, 70, 80, 90], score)])
# F A C C B A A

基于定位插入点特性不便于查找特定元素，通常需要封装一层函数

import bisect
import operator
from collections import namedtupleMovie = namedtuple('Movie', ('id', 'name', 'released', 'director'))movies = [Movie(1, 'Jaws', 1975, 'Spielberg'),Movie(3, 'Titanic', 1997, 'Cameron'),Movie(5, 'The Birds', 1963, 'Hitchcock'),Movie(7, 'Aliens', 1986, 'Cameron')
]print(bisect.bisect_left(movies, 2, key=operator.attrgetter('id')))  # 1
print(bisect.bisect_left(movies, 3, key=operator.attrgetter('id')))  # 1
print(bisect.bisect_right(movies, 3, key=operator.attrgetter('id')))  # 2def index(a, x, key=None):i = bisect.bisect_left(a, x, key=key)if i != len(a) and (key(a[i]) if key else a[i]) == x:return iraise ValueErrordef find_lt(a, x, key=None):i = bisect.bisect_left(a, x, key=key)if i:return a[i - 1]raise ValueErrordef find_le(a, x, key=None):i = bisect.bisect_right(a, x, key=key)if i:return a[i - 1]raise ValueErrordef find_gt(a, x, key=None):i = bisect.bisect_right(a, x, key=key)if i != len(a):return a[i]raise ValueErrordef find_ge(a, x, key=None):i = bisect.bisect_left(a, x, key=key)if i != len(a):return a[i]raise ValueErrorprint(index(movies, 5, key=operator.attrgetter('id')))  # 2
print(index(movies, 2, key=operator.attrgetter('id')))  # ValueError

三方有序容器库，/

浅拷贝与深拷贝，shallow copy and deep copy

import copyl = [1, 2, [3]]
copy_copy = copy.copy(l)
copy_copy[0] = '1a'
copy_copy[2][0] = '3a'
print(l, copy_copy)  # [1, 2, ['3a']] ['1a', 2, ['3a']]copy_deepcopy = copy.deepcopy(copy_copy)
copy_deepcopy[0] = 1
copy_deepcopy[2][0] = 3
print(copy_copy, copy_deepcopy)  # ['1a', 2, ['3a']] [1, 2, [3]]

更多推荐

功能基础篇7——Python基础数据结构与算法标准库

本文发布于:2023-12-04 06:33:15，感谢您对本站的认可！

本文链接:https://www.elefans.com/category/jswz/34/1660038.html