Python利用正则提取字符串中数字的方法_F11 - 专业站长和开发者的学习网站

APP正在开发中...

返回顶部

分享到

Python利用正则提取字符串中数字的方法

python 来源：互联网作者：佚名发布时间：2026-02-18 19:27:14 人浏览

摘要

方法1：最简单直接的方法（推荐） 1 2 3 4 5 6 7 8 9 10 11 12 import re text = cn_000858 # 提取所有数字 numbers = re.findall(r\d+, text) print(numbers) # [000858] # 如果只需要第一个匹配结果 if numbers: result = numbers[0

方法1：最简单直接的方法（推荐）

import re

text = 'cn_000858'

# 提取所有数字

numbers = re.findall(r'\d+', text)

print(numbers) # ['000858']

# 如果只需要第一个匹配结果

if numbers:

result = numbers[0]

print(result) # '000858'

方法2：使用search提取第一个数字

import re

text = 'cn_000858'

# 搜索第一个数字序列

match = re.search(r'\d+', text)

if match:

result = match.group()

print(result) # '000858'

方法3：精确匹配’cn_'后的数字

import re

text = 'cn_000858'

# 匹配'cn_'后面的数字

match = re.search(r'cn_(\d+)', text)

if match:

result = match.group(1) # group(1)获取第一个捕获组

print(result) # '000858'

方法4：使用findall处理多个数字

import re

# 如果字符串中有多个数字

text = 'cn_000858_stock_123'

# 提取所有数字

numbers = re.findall(r'\d+', text)

print(numbers) # ['000858', '123']

# 提取特定位置的数字

match = re.search(r'cn_(\d+)', text)

if match:

stock_code = match.group(1)

print(f"股票代码: {stock_code}") # '000858'

方法5：使用split分割提取

import re

text = 'cn_000858'

# 使用下划线分割，取最后一部分

parts = text.split('_')

result = parts[-1]

print(result) # '000858'

# 或者用正则split

result = re.split(r'[^\d]+', text)[-1]

print(result) # '000858'

方法6：提取并转换为整数

import re

text = 'cn_000858'

# 提取数字并转换为整数（会去掉前导零）

match = re.search(r'\d+', text)

if match:

number_str = match.group()

number_int = int(number_str)

print(f"字符串: {number_str}, 整数: {number_int}")

# 输出: 字符串: 000858, 整数: 858

# 如果需要保留前导零，保持字符串形式

result = match.group()

print(f"保留前导零: {result}") # '000858'

方法7：处理多种格式的股票代码

import re

def extract_stock_code(text):

"""

从各种格式中提取股票代码

"""

# 匹配多种模式: cn_000858, sh600000, sz000001, 000858等

patterns = [

r'cn_(\d+)', # cn_000858

r'(?:sh|sz)(\d+)', # sh600000, sz000001

r'^(\d{6})$', # 纯数字6位

r'_(\d{6})', # 下划线后6位数字

]

for pattern in patterns:

match = re.search(pattern, text, re.IGNORECASE)

if match:

return match.group(1)

return None

# 测试

test_cases = [

'cn_000858',

'sh600000',

'sz000001',

'000858',

'stock_cn_000858_data',

]

for text in test_cases:

code = extract_stock_code(text)

print(f"{text:20} -> [code]")

方法8：使用命名捕获组（更清晰）

import re

text = 'cn_000858'

# 使用命名捕获组

match = re.search(r'cn_(?P<code>\d+)', text)

if match:

code = match.group('code')

print(f"股票代码: [code]") # '000858'

# 也可以用groupdict()

print(match.groupdict()) # {'code': '000858'}

方法9：批量处理列表

import re

# 批量处理多个字符串

texts = [

'cn_000858',

'cn_000001',

'sh600000',

'sz000002',

]

# 使用列表推导式

codes = [re.search(r'\d+', text).group() for text in texts if re.search(r'\d+', text)]

print(codes) # ['000858', '000001', '600000', '000002']

# 更安全的方式（处理可能没有数字的情况）

codes = []

for text in texts:

match = re.search(r'\d+', text)

if match:

codes.append(match.group())

print(codes)

方法10：完整的股票代码提取工具

import re

from typing import Optional, List

class StockCodeExtractor:

"""股票代码提取器"""

@staticmethod

def extract(text: str) -> Optional[str]:

"""

从文本中提取股票代码

支持格式: cn_000858, sh600000, sz000001, 000858等

"""

if not text:

return None

# 模式列表（按优先级排序）

patterns = [

(r'cn_(\d{6})', 1), # cn_000858

(r'(?:sh|sz|hk)(\d{6})', 1), # sh600000, sz000001, hk00700

(r'^(\d{6})$', 1), # 纯6位数字

(r'_(\d{6})', 1), # 下划线后6位数字

(r'\b(\d{6})\b', 1), # 单词边界的6位数字

(r'\d+', 0), # 任意数字（最后备选）

]

for pattern, group_idx in patterns:

match = re.search(pattern, text, re.IGNORECASE)

if match:

code = match.group(group_idx)

# 验证是否为6位数字（股票代码通常是6位）

if len(code) == 6 and code.isdigit():

return code

# 如果不是6位但也没有其他匹配，返回它

elif group_idx == 0: # 最后一个模式

return code

return None

@staticmethod

def extract_all(text: str) -> List[str]:

"""

提取文本中所有可能的股票代码

"""

# 提取所有6位数字序列

codes = re.findall(r'\b\d{6}\b', text)

return codes

@staticmethod

def validate(code: str) -> bool:

"""

验证股票代码格式

"""

return bool(re.match(r'^\d{6}$', code))

# 使用示例

if __name__ == "__main__":

extractor = StockCodeExtractor()

test_cases = [

'cn_000858',

'深南电路 cn_000858',

'sh600000',

'sz000001',

'000858',

'股票代码: 000858',

'hk00700',

]

print("单个提取:")

for text in test_cases:

code = extractor.extract(text)

is_valid = extractor.validate(code) if code else False

print(f"{text:25} -> {code:10} (有效: {is_valid})")

print("\n批量提取:")

text = "关注股票: cn_000858, sh600000, sz000001"

codes = extractor.extract_all(text)

print(f"文本: {text}")

print(f"提取的代码: {codes}")

方法11：处理您的Redis股票数据场景

import re

import json

def extract_stock_code_from_redis(data_str):

"""

从Redis读取的数据中提取股票代码

"""

try:

# 先尝试解析JSON

data = json.loads(data_str)

# 如果是字典，查找包含代码的字段

if isinstance(data, dict):

# 尝试常见的字段名

code_fields = ['code', 'stock_code', 'symbol', 'ts_code']

for field in code_fields:

if field in data:

code = data[field]

# 提取数字部分

match = re.search(r'\d+', str(code))

if match:

return match.group()

# 如果没有找到，尝试从所有值中提取

for value in data.values():

match = re.search(r'\d+', str(value))

if match and len(match.group()) == 6:

return match.group()

# 如果是字符串，直接提取

elif isinstance(data, str):

match = re.search(r'\d{6}', data)

if match:

return match.group()

except json.JSONDecodeError:

# 如果不是JSON，直接从字符串提取

match = re.search(r'\d{6}', data_str)

if match:

return match.group()

return None

# 测试

test_data = [

'{"name": "深南电路", "code": "cn_000858"}',

'{"ts_code": "000858.SZ", "name": "深南电路"}',

'cn_000858',

'000858',

]

for data in test_data:

code = extract_stock_code_from_redis(data)

print(f"{data:40} -> 代码: [code]")

快速解决方案

针对您的具体需求，最简单的代码：

import re

# 从 'cn_000858' 提取数字

text = 'cn_000858'

result = re.search(r'\d+', text).group()

print(result) # '000858'

# 或者一行代码

result = re.findall(r'\d+', 'cn_000858')[0]

print(result) # '000858'

正则表达式说明

模式	说明	示例
\d+	匹配一个或多个数字	‘000858’
\d{6}	匹配恰好6个数字	‘000858’
cn_(\d+)	匹配’cn_'后的数字	‘000858’
(?:sh\|sz)(\d+)	匹配sh或sz后的数字	‘600000’
\b\d{6}\b	单词边界的6位数字	‘000858’

选择最适合您场景的方法即可！

您可能感兴趣的文章 :

原文链接 :

Tag : 正则(5)字符串(51)

Python中dict与set的实现原理

前言：Python中的高效数据结构在Python的世界里，dict（字典）和set（集合）是两种极其重要且高效的数据结构。它们不仅在日常编程中被广泛
Python利用正则提取字符串中数字的方法

方法1：最简单直接的方法（推荐） 1 2 3 4 5 6 7 8 9 10 11 12 import re text = cn_000858 # 提取所有数字 numbers = re.findall(r\d+, text) print(numbers) # [000858]
python实现PDF文档提取,分割与合并操作

一、PDF提取文字/转图片提取文字和转图片使用的是fitz模块，模块安装： 1 pip install PyMuPDF 提取文字 1 2 3 4 5 6 7 8 9 10 11 12 13 import fitz pdf = f
Python中enumerate函数的巧妙用法

在算法题目中，处理数组（List）、字符串、矩阵等可迭代对象时，同时获取索引和元素值是高频需求比如找目标元素的位置、双指针遍历
Python中的断言机制的介绍

想象你正在开发一个电商系统，有个计算商品折扣的函数。正常情况下，折扣率应该在0到1之间，但某天测试时发现某个商品折扣变成了1.
Python使用MySQL数据库进行事务处理示例

一、事务核心概念（先理解再实操）事务（Transaction）是数据库操作的最小逻辑单元，遵循ACID 原则：原子性（Atomicity）：要么全部执行成
Python多进程与多线程适用场景案例

你想明确多进程和多线程各自的适用场景，核心是要结合任务类型、资源需求、数据共享等维度来判断简单来说，IO密集型任务优先用多线程
使用Python实现一个自动整理音乐文件脚本

一、音乐文件管理的痛点与解决方案现代音乐收藏常面临杂乱无章的问题：同一艺术家的歌曲散落在不同文件夹，专辑被错误命名，甚至文
Python中as关键字的作用实例介绍

在 Python 中，as是一个关键字，核心语义是将某个对象绑定到指定的变量（或给对象起别名），从而简化代码操作、访问对象属性。它主要应
Python使用urllib和requests发送HTTP请求的方法

本文通过天气API示例演示了实际应用，并提供了超时设置、错误处理和JSON解析等实用技巧。推荐大多数场景使用requests库，同时强调了异常

Python利用正则提取字符串中数字的方法

方法1：最简单直接的方法（推荐）

方法2：使用search提取第一个数字

方法3：精确匹配’cn_'后的数字

方法4：使用findall处理多个数字

方法5：使用split分割提取

方法6：提取并转换为整数

方法7：处理多种格式的股票代码

方法8：使用命名捕获组（更清晰）

方法9：批量处理列表

方法10：完整的股票代码提取工具

方法11：处理您的Redis股票数据场景

快速解决方案

正则表达式说明

您可能感兴趣的文章 :

Python中dict与set的实现原理

Python利用正则提取字符串中数字的方法

python实现PDF文档提取,分割与合并操作

Python中enumerate函数的巧妙用法

Python中的断言机制的介绍

Python使用MySQL数据库进行事务处理示例

Python多进程与多线程适用场景案例

使用Python实现一个自动整理音乐文件脚本

Python中as关键字的作用实例介绍

Python使用urllib和requests发送HTTP请求的方法

python批量下载抖音视频

利用Pyecharts可视化微信好友的方法

python爬取豆瓣电影TOP250数据

基于tensorflow权重文件的解读

解决Python字典查找报Keyerror的问题