from bs4 import BeautifulSoup, Comment def preprocessing(soup): # コメントタグの除去 for comment in soup(text=lambda x: isinstance(x, Comment)): comment.extract() # scriptタグの除去 for script in soup.find_all('script', src=False): script.decompose() # styleタグの除去 for style in soup.find_all('style', src=False): style.decompose() return soup soup = BeautifulSoup(html, 'lxml') soup2 = preprocessing(soup)
便利。素晴らしい。
消したいタグはxxx.decompose()
で除去できると思う。